Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
35269 views
//===- InferAddressSpace.cpp - --------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// CUDA C/C++ includes memory space designation as variable type qualifers (such9// as __global__ and __shared__). Knowing the space of a memory access allows10// CUDA compilers to emit faster PTX loads and stores. For example, a load from11// shared memory can be translated to `ld.shared` which is roughly 10% faster12// than a generic `ld` on an NVIDIA Tesla K40c.13//14// Unfortunately, type qualifiers only apply to variable declarations, so CUDA15// compilers must infer the memory space of an address expression from16// type-qualified variables.17//18// LLVM IR uses non-zero (so-called) specific address spaces to represent memory19// spaces (e.g. addrspace(3) means shared memory). The Clang frontend20// places only type-qualified variables in specific address spaces, and then21// conservatively `addrspacecast`s each type-qualified variable to addrspace(0)22// (so-called the generic address space) for other instructions to use.23//24// For example, the Clang translates the following CUDA code25// __shared__ float a[10];26// float v = a[i];27// to28// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*29// %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i30// %v = load float, float* %1 ; emits ld.f3231// @a is in addrspace(3) since it's type-qualified, but its use from %1 is32// redirected to %0 (the generic version of @a).33//34// The optimization implemented in this file propagates specific address spaces35// from type-qualified variable declarations to its users. For example, it36// optimizes the above IR to37// %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i38// %v = load float addrspace(3)* %1 ; emits ld.shared.f3239// propagating the addrspace(3) from @a to %1. As the result, the NVPTX40// codegen is able to emit ld.shared.f32 for %v.41//42// Address space inference works in two steps. First, it uses a data-flow43// analysis to infer as many generic pointers as possible to point to only one44// specific address space. In the above example, it can prove that %1 only45// points to addrspace(3). This algorithm was published in46// CUDA: Compiling and optimizing for a GPU platform47// Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang48// ICCS 201249//50// Then, address space inference replaces all refinable generic pointers with51// equivalent specific pointers.52//53// The major challenge of implementing this optimization is handling PHINodes,54// which may create loops in the data flow graph. This brings two complications.55//56// First, the data flow analysis in Step 1 needs to be circular. For example,57// %generic.input = addrspacecast float addrspace(3)* %input to float*58// loop:59// %y = phi [ %generic.input, %y2 ]60// %y2 = getelementptr %y, 161// %v = load %y262// br ..., label %loop, ...63// proving %y specific requires proving both %generic.input and %y2 specific,64// but proving %y2 specific circles back to %y. To address this complication,65// the data flow analysis operates on a lattice:66// uninitialized > specific address spaces > generic.67// All address expressions (our implementation only considers phi, bitcast,68// addrspacecast, and getelementptr) start with the uninitialized address space.69// The monotone transfer function moves the address space of a pointer down a70// lattice path from uninitialized to specific and then to generic. A join71// operation of two different specific address spaces pushes the expression down72// to the generic address space. The analysis completes once it reaches a fixed73// point.74//75// Second, IR rewriting in Step 2 also needs to be circular. For example,76// converting %y to addrspace(3) requires the compiler to know the converted77// %y2, but converting %y2 needs the converted %y. To address this complication,78// we break these cycles using "poison" placeholders. When converting an79// instruction `I` to a new address space, if its operand `Op` is not converted80// yet, we let `I` temporarily use `poison` and fix all the uses later.81// For instance, our algorithm first converts %y to82// %y' = phi float addrspace(3)* [ %input, poison ]83// Then, it converts %y2 to84// %y2' = getelementptr %y', 185// Finally, it fixes the poison in %y' so that86// %y' = phi float addrspace(3)* [ %input, %y2' ]87//88//===----------------------------------------------------------------------===//8990#include "llvm/Transforms/Scalar/InferAddressSpaces.h"91#include "llvm/ADT/ArrayRef.h"92#include "llvm/ADT/DenseMap.h"93#include "llvm/ADT/DenseSet.h"94#include "llvm/ADT/SetVector.h"95#include "llvm/ADT/SmallVector.h"96#include "llvm/Analysis/AssumptionCache.h"97#include "llvm/Analysis/TargetTransformInfo.h"98#include "llvm/Analysis/ValueTracking.h"99#include "llvm/IR/BasicBlock.h"100#include "llvm/IR/Constant.h"101#include "llvm/IR/Constants.h"102#include "llvm/IR/Dominators.h"103#include "llvm/IR/Function.h"104#include "llvm/IR/IRBuilder.h"105#include "llvm/IR/InstIterator.h"106#include "llvm/IR/Instruction.h"107#include "llvm/IR/Instructions.h"108#include "llvm/IR/IntrinsicInst.h"109#include "llvm/IR/Intrinsics.h"110#include "llvm/IR/LLVMContext.h"111#include "llvm/IR/Operator.h"112#include "llvm/IR/PassManager.h"113#include "llvm/IR/Type.h"114#include "llvm/IR/Use.h"115#include "llvm/IR/User.h"116#include "llvm/IR/Value.h"117#include "llvm/IR/ValueHandle.h"118#include "llvm/InitializePasses.h"119#include "llvm/Pass.h"120#include "llvm/Support/Casting.h"121#include "llvm/Support/CommandLine.h"122#include "llvm/Support/Compiler.h"123#include "llvm/Support/Debug.h"124#include "llvm/Support/ErrorHandling.h"125#include "llvm/Support/raw_ostream.h"126#include "llvm/Transforms/Scalar.h"127#include "llvm/Transforms/Utils/Local.h"128#include "llvm/Transforms/Utils/ValueMapper.h"129#include <cassert>130#include <iterator>131#include <limits>132#include <utility>133#include <vector>134135#define DEBUG_TYPE "infer-address-spaces"136137using namespace llvm;138139static cl::opt<bool> AssumeDefaultIsFlatAddressSpace(140"assume-default-is-flat-addrspace", cl::init(false), cl::ReallyHidden,141cl::desc("The default address space is assumed as the flat address space. "142"This is mainly for test purpose."));143144static const unsigned UninitializedAddressSpace =145std::numeric_limits<unsigned>::max();146147namespace {148149using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;150// Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on151// the *def* of a value, PredicatedAddrSpaceMapTy is map where a new152// addrspace is inferred on the *use* of a pointer. This map is introduced to153// infer addrspace from the addrspace predicate assumption built from assume154// intrinsic. In that scenario, only specific uses (under valid assumption155// context) could be inferred with a new addrspace.156using PredicatedAddrSpaceMapTy =157DenseMap<std::pair<const Value *, const Value *>, unsigned>;158using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;159160class InferAddressSpaces : public FunctionPass {161unsigned FlatAddrSpace = 0;162163public:164static char ID;165166InferAddressSpaces()167: FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {168initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry());169}170InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {171initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry());172}173174void getAnalysisUsage(AnalysisUsage &AU) const override {175AU.setPreservesCFG();176AU.addPreserved<DominatorTreeWrapperPass>();177AU.addRequired<AssumptionCacheTracker>();178AU.addRequired<TargetTransformInfoWrapperPass>();179}180181bool runOnFunction(Function &F) override;182};183184class InferAddressSpacesImpl {185AssumptionCache &AC;186const DominatorTree *DT = nullptr;187const TargetTransformInfo *TTI = nullptr;188const DataLayout *DL = nullptr;189190/// Target specific address space which uses of should be replaced if191/// possible.192unsigned FlatAddrSpace = 0;193194// Try to update the address space of V. If V is updated, returns true and195// false otherwise.196bool updateAddressSpace(const Value &V,197ValueToAddrSpaceMapTy &InferredAddrSpace,198PredicatedAddrSpaceMapTy &PredicatedAS) const;199200// Tries to infer the specific address space of each address expression in201// Postorder.202void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,203ValueToAddrSpaceMapTy &InferredAddrSpace,204PredicatedAddrSpaceMapTy &PredicatedAS) const;205206bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;207208Value *cloneInstructionWithNewAddressSpace(209Instruction *I, unsigned NewAddrSpace,210const ValueToValueMapTy &ValueWithNewAddrSpace,211const PredicatedAddrSpaceMapTy &PredicatedAS,212SmallVectorImpl<const Use *> *PoisonUsesToFix) const;213214// Changes the flat address expressions in function F to point to specific215// address spaces if InferredAddrSpace says so. Postorder is the postorder of216// all flat expressions in the use-def graph of function F.217bool218rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,219const ValueToAddrSpaceMapTy &InferredAddrSpace,220const PredicatedAddrSpaceMapTy &PredicatedAS,221Function *F) const;222223void appendsFlatAddressExpressionToPostorderStack(224Value *V, PostorderStackTy &PostorderStack,225DenseSet<Value *> &Visited) const;226227bool rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV,228Value *NewV) const;229void collectRewritableIntrinsicOperands(IntrinsicInst *II,230PostorderStackTy &PostorderStack,231DenseSet<Value *> &Visited) const;232233std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;234235Value *cloneValueWithNewAddressSpace(236Value *V, unsigned NewAddrSpace,237const ValueToValueMapTy &ValueWithNewAddrSpace,238const PredicatedAddrSpaceMapTy &PredicatedAS,239SmallVectorImpl<const Use *> *PoisonUsesToFix) const;240unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;241242unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const;243244public:245InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT,246const TargetTransformInfo *TTI, unsigned FlatAddrSpace)247: AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}248bool run(Function &F);249};250251} // end anonymous namespace252253char InferAddressSpaces::ID = 0;254255INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",256false, false)257INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)258INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)259INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",260false, false)261262static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) {263assert(Ty->isPtrOrPtrVectorTy());264PointerType *NPT = PointerType::get(Ty->getContext(), NewAddrSpace);265return Ty->getWithNewType(NPT);266}267268// Check whether that's no-op pointer bicast using a pair of269// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over270// different address spaces.271static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,272const TargetTransformInfo *TTI) {273assert(I2P->getOpcode() == Instruction::IntToPtr);274auto *P2I = dyn_cast<Operator>(I2P->getOperand(0));275if (!P2I || P2I->getOpcode() != Instruction::PtrToInt)276return false;277// Check it's really safe to treat that pair of `ptrtoint`/`inttoptr` as a278// no-op cast. Besides checking both of them are no-op casts, as the279// reinterpreted pointer may be used in other pointer arithmetic, we also280// need to double-check that through the target-specific hook. That ensures281// the underlying target also agrees that's a no-op address space cast and282// pointer bits are preserved.283// The current IR spec doesn't have clear rules on address space casts,284// especially a clear definition for pointer bits in non-default address285// spaces. It would be undefined if that pointer is dereferenced after an286// invalid reinterpret cast. Also, due to the unclearness for the meaning of287// bits in non-default address spaces in the current spec, the pointer288// arithmetic may also be undefined after invalid pointer reinterpret cast.289// However, as we confirm through the target hooks that it's a no-op290// addrspacecast, it doesn't matter since the bits should be the same.291unsigned P2IOp0AS = P2I->getOperand(0)->getType()->getPointerAddressSpace();292unsigned I2PAS = I2P->getType()->getPointerAddressSpace();293return CastInst::isNoopCast(Instruction::CastOps(I2P->getOpcode()),294I2P->getOperand(0)->getType(), I2P->getType(),295DL) &&296CastInst::isNoopCast(Instruction::CastOps(P2I->getOpcode()),297P2I->getOperand(0)->getType(), P2I->getType(),298DL) &&299(P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(P2IOp0AS, I2PAS));300}301302// Returns true if V is an address expression.303// TODO: Currently, we consider only phi, bitcast, addrspacecast, and304// getelementptr operators.305static bool isAddressExpression(const Value &V, const DataLayout &DL,306const TargetTransformInfo *TTI) {307const Operator *Op = dyn_cast<Operator>(&V);308if (!Op)309return false;310311switch (Op->getOpcode()) {312case Instruction::PHI:313assert(Op->getType()->isPtrOrPtrVectorTy());314return true;315case Instruction::BitCast:316case Instruction::AddrSpaceCast:317case Instruction::GetElementPtr:318return true;319case Instruction::Select:320return Op->getType()->isPtrOrPtrVectorTy();321case Instruction::Call: {322const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&V);323return II && II->getIntrinsicID() == Intrinsic::ptrmask;324}325case Instruction::IntToPtr:326return isNoopPtrIntCastPair(Op, DL, TTI);327default:328// That value is an address expression if it has an assumed address space.329return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;330}331}332333// Returns the pointer operands of V.334//335// Precondition: V is an address expression.336static SmallVector<Value *, 2>337getPointerOperands(const Value &V, const DataLayout &DL,338const TargetTransformInfo *TTI) {339const Operator &Op = cast<Operator>(V);340switch (Op.getOpcode()) {341case Instruction::PHI: {342auto IncomingValues = cast<PHINode>(Op).incoming_values();343return {IncomingValues.begin(), IncomingValues.end()};344}345case Instruction::BitCast:346case Instruction::AddrSpaceCast:347case Instruction::GetElementPtr:348return {Op.getOperand(0)};349case Instruction::Select:350return {Op.getOperand(1), Op.getOperand(2)};351case Instruction::Call: {352const IntrinsicInst &II = cast<IntrinsicInst>(Op);353assert(II.getIntrinsicID() == Intrinsic::ptrmask &&354"unexpected intrinsic call");355return {II.getArgOperand(0)};356}357case Instruction::IntToPtr: {358assert(isNoopPtrIntCastPair(&Op, DL, TTI));359auto *P2I = cast<Operator>(Op.getOperand(0));360return {P2I->getOperand(0)};361}362default:363llvm_unreachable("Unexpected instruction type.");364}365}366367bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,368Value *OldV,369Value *NewV) const {370Module *M = II->getParent()->getParent()->getParent();371372switch (II->getIntrinsicID()) {373case Intrinsic::objectsize: {374Type *DestTy = II->getType();375Type *SrcTy = NewV->getType();376Function *NewDecl =377Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});378II->setArgOperand(0, NewV);379II->setCalledFunction(NewDecl);380return true;381}382case Intrinsic::ptrmask:383// This is handled as an address expression, not as a use memory operation.384return false;385case Intrinsic::masked_gather: {386Type *RetTy = II->getType();387Type *NewPtrTy = NewV->getType();388Function *NewDecl =389Intrinsic::getDeclaration(M, II->getIntrinsicID(), {RetTy, NewPtrTy});390II->setArgOperand(0, NewV);391II->setCalledFunction(NewDecl);392return true;393}394case Intrinsic::masked_scatter: {395Type *ValueTy = II->getOperand(0)->getType();396Type *NewPtrTy = NewV->getType();397Function *NewDecl =398Intrinsic::getDeclaration(M, II->getIntrinsicID(), {ValueTy, NewPtrTy});399II->setArgOperand(1, NewV);400II->setCalledFunction(NewDecl);401return true;402}403default: {404Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);405if (!Rewrite)406return false;407if (Rewrite != II)408II->replaceAllUsesWith(Rewrite);409return true;410}411}412}413414void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(415IntrinsicInst *II, PostorderStackTy &PostorderStack,416DenseSet<Value *> &Visited) const {417auto IID = II->getIntrinsicID();418switch (IID) {419case Intrinsic::ptrmask:420case Intrinsic::objectsize:421appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),422PostorderStack, Visited);423break;424case Intrinsic::masked_gather:425appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),426PostorderStack, Visited);427break;428case Intrinsic::masked_scatter:429appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1),430PostorderStack, Visited);431break;432default:433SmallVector<int, 2> OpIndexes;434if (TTI->collectFlatAddressOperands(OpIndexes, IID)) {435for (int Idx : OpIndexes) {436appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(Idx),437PostorderStack, Visited);438}439}440break;441}442}443444// Returns all flat address expressions in function F. The elements are445// If V is an unvisited flat address expression, appends V to PostorderStack446// and marks it as visited.447void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(448Value *V, PostorderStackTy &PostorderStack,449DenseSet<Value *> &Visited) const {450assert(V->getType()->isPtrOrPtrVectorTy());451452// Generic addressing expressions may be hidden in nested constant453// expressions.454if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {455// TODO: Look in non-address parts, like icmp operands.456if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)457PostorderStack.emplace_back(CE, false);458459return;460}461462if (V->getType()->getPointerAddressSpace() == FlatAddrSpace &&463isAddressExpression(*V, *DL, TTI)) {464if (Visited.insert(V).second) {465PostorderStack.emplace_back(V, false);466467Operator *Op = cast<Operator>(V);468for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {469if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {470if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)471PostorderStack.emplace_back(CE, false);472}473}474}475}476}477478// Returns all flat address expressions in function F. The elements are ordered479// in postorder.480std::vector<WeakTrackingVH>481InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {482// This function implements a non-recursive postorder traversal of a partial483// use-def graph of function F.484PostorderStackTy PostorderStack;485// The set of visited expressions.486DenseSet<Value *> Visited;487488auto PushPtrOperand = [&](Value *Ptr) {489appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack, Visited);490};491492// Look at operations that may be interesting accelerate by moving to a known493// address space. We aim at generating after loads and stores, but pure494// addressing calculations may also be faster.495for (Instruction &I : instructions(F)) {496if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {497PushPtrOperand(GEP->getPointerOperand());498} else if (auto *LI = dyn_cast<LoadInst>(&I))499PushPtrOperand(LI->getPointerOperand());500else if (auto *SI = dyn_cast<StoreInst>(&I))501PushPtrOperand(SI->getPointerOperand());502else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))503PushPtrOperand(RMW->getPointerOperand());504else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))505PushPtrOperand(CmpX->getPointerOperand());506else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) {507// For memset/memcpy/memmove, any pointer operand can be replaced.508PushPtrOperand(MI->getRawDest());509510// Handle 2nd operand for memcpy/memmove.511if (auto *MTI = dyn_cast<MemTransferInst>(MI))512PushPtrOperand(MTI->getRawSource());513} else if (auto *II = dyn_cast<IntrinsicInst>(&I))514collectRewritableIntrinsicOperands(II, PostorderStack, Visited);515else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) {516if (Cmp->getOperand(0)->getType()->isPtrOrPtrVectorTy()) {517PushPtrOperand(Cmp->getOperand(0));518PushPtrOperand(Cmp->getOperand(1));519}520} else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {521PushPtrOperand(ASC->getPointerOperand());522} else if (auto *I2P = dyn_cast<IntToPtrInst>(&I)) {523if (isNoopPtrIntCastPair(cast<Operator>(I2P), *DL, TTI))524PushPtrOperand(cast<Operator>(I2P->getOperand(0))->getOperand(0));525} else if (auto *RI = dyn_cast<ReturnInst>(&I)) {526if (auto *RV = RI->getReturnValue();527RV && RV->getType()->isPtrOrPtrVectorTy())528PushPtrOperand(RV);529}530}531532std::vector<WeakTrackingVH> Postorder; // The resultant postorder.533while (!PostorderStack.empty()) {534Value *TopVal = PostorderStack.back().getPointer();535// If the operands of the expression on the top are already explored,536// adds that expression to the resultant postorder.537if (PostorderStack.back().getInt()) {538if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace)539Postorder.push_back(TopVal);540PostorderStack.pop_back();541continue;542}543// Otherwise, adds its operands to the stack and explores them.544PostorderStack.back().setInt(true);545// Skip values with an assumed address space.546if (TTI->getAssumedAddrSpace(TopVal) == UninitializedAddressSpace) {547for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {548appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,549Visited);550}551}552}553return Postorder;554}555556// A helper function for cloneInstructionWithNewAddressSpace. Returns the clone557// of OperandUse.get() in the new address space. If the clone is not ready yet,558// returns poison in the new address space as a placeholder.559static Value *operandWithNewAddressSpaceOrCreatePoison(560const Use &OperandUse, unsigned NewAddrSpace,561const ValueToValueMapTy &ValueWithNewAddrSpace,562const PredicatedAddrSpaceMapTy &PredicatedAS,563SmallVectorImpl<const Use *> *PoisonUsesToFix) {564Value *Operand = OperandUse.get();565566Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAddrSpace);567568if (Constant *C = dyn_cast<Constant>(Operand))569return ConstantExpr::getAddrSpaceCast(C, NewPtrTy);570571if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))572return NewOperand;573574Instruction *Inst = cast<Instruction>(OperandUse.getUser());575auto I = PredicatedAS.find(std::make_pair(Inst, Operand));576if (I != PredicatedAS.end()) {577// Insert an addrspacecast on that operand before the user.578unsigned NewAS = I->second;579Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAS);580auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);581NewI->insertBefore(Inst);582NewI->setDebugLoc(Inst->getDebugLoc());583return NewI;584}585586PoisonUsesToFix->push_back(&OperandUse);587return PoisonValue::get(NewPtrTy);588}589590// Returns a clone of `I` with its operands converted to those specified in591// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an592// operand whose address space needs to be modified might not exist in593// ValueWithNewAddrSpace. In that case, uses poison as a placeholder operand and594// adds that operand use to PoisonUsesToFix so that caller can fix them later.595//596// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast597// from a pointer whose type already matches. Therefore, this function returns a598// Value* instead of an Instruction*.599//600// This may also return nullptr in the case the instruction could not be601// rewritten.602Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(603Instruction *I, unsigned NewAddrSpace,604const ValueToValueMapTy &ValueWithNewAddrSpace,605const PredicatedAddrSpaceMapTy &PredicatedAS,606SmallVectorImpl<const Use *> *PoisonUsesToFix) const {607Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(I->getType(), NewAddrSpace);608609if (I->getOpcode() == Instruction::AddrSpaceCast) {610Value *Src = I->getOperand(0);611// Because `I` is flat, the source address space must be specific.612// Therefore, the inferred address space must be the source space, according613// to our algorithm.614assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);615if (Src->getType() != NewPtrType)616return new BitCastInst(Src, NewPtrType);617return Src;618}619620if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {621// Technically the intrinsic ID is a pointer typed argument, so specially622// handle calls early.623assert(II->getIntrinsicID() == Intrinsic::ptrmask);624Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison(625II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace,626PredicatedAS, PoisonUsesToFix);627Value *Rewrite =628TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr);629if (Rewrite) {630assert(Rewrite != II && "cannot modify this pointer operation in place");631return Rewrite;632}633634return nullptr;635}636637unsigned AS = TTI->getAssumedAddrSpace(I);638if (AS != UninitializedAddressSpace) {639// For the assumed address space, insert an `addrspacecast` to make that640// explicit.641Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(I->getType(), AS);642auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);643NewI->insertAfter(I);644NewI->setDebugLoc(I->getDebugLoc());645return NewI;646}647648// Computes the converted pointer operands.649SmallVector<Value *, 4> NewPointerOperands;650for (const Use &OperandUse : I->operands()) {651if (!OperandUse.get()->getType()->isPtrOrPtrVectorTy())652NewPointerOperands.push_back(nullptr);653else654NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreatePoison(655OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,656PoisonUsesToFix));657}658659switch (I->getOpcode()) {660case Instruction::BitCast:661return new BitCastInst(NewPointerOperands[0], NewPtrType);662case Instruction::PHI: {663assert(I->getType()->isPtrOrPtrVectorTy());664PHINode *PHI = cast<PHINode>(I);665PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues());666for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) {667unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index);668NewPHI->addIncoming(NewPointerOperands[OperandNo],669PHI->getIncomingBlock(Index));670}671return NewPHI;672}673case Instruction::GetElementPtr: {674GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);675GetElementPtrInst *NewGEP = GetElementPtrInst::Create(676GEP->getSourceElementType(), NewPointerOperands[0],677SmallVector<Value *, 4>(GEP->indices()));678NewGEP->setIsInBounds(GEP->isInBounds());679return NewGEP;680}681case Instruction::Select:682assert(I->getType()->isPtrOrPtrVectorTy());683return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],684NewPointerOperands[2], "", nullptr, I);685case Instruction::IntToPtr: {686assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI));687Value *Src = cast<Operator>(I->getOperand(0))->getOperand(0);688if (Src->getType() == NewPtrType)689return Src;690691// If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a692// source address space from a generic pointer source need to insert a cast693// back.694return CastInst::CreatePointerBitCastOrAddrSpaceCast(Src, NewPtrType);695}696default:697llvm_unreachable("Unexpected opcode");698}699}700701// Similar to cloneInstructionWithNewAddressSpace, returns a clone of the702// constant expression `CE` with its operands replaced as specified in703// ValueWithNewAddrSpace.704static Value *cloneConstantExprWithNewAddressSpace(705ConstantExpr *CE, unsigned NewAddrSpace,706const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,707const TargetTransformInfo *TTI) {708Type *TargetType =709CE->getType()->isPtrOrPtrVectorTy()710? getPtrOrVecOfPtrsWithNewAS(CE->getType(), NewAddrSpace)711: CE->getType();712713if (CE->getOpcode() == Instruction::AddrSpaceCast) {714// Because CE is flat, the source address space must be specific.715// Therefore, the inferred address space must be the source space according716// to our algorithm.717assert(CE->getOperand(0)->getType()->getPointerAddressSpace() ==718NewAddrSpace);719return ConstantExpr::getBitCast(CE->getOperand(0), TargetType);720}721722if (CE->getOpcode() == Instruction::BitCast) {723if (Value *NewOperand = ValueWithNewAddrSpace.lookup(CE->getOperand(0)))724return ConstantExpr::getBitCast(cast<Constant>(NewOperand), TargetType);725return ConstantExpr::getAddrSpaceCast(CE, TargetType);726}727728if (CE->getOpcode() == Instruction::IntToPtr) {729assert(isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI));730Constant *Src = cast<ConstantExpr>(CE->getOperand(0))->getOperand(0);731assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);732return ConstantExpr::getBitCast(Src, TargetType);733}734735// Computes the operands of the new constant expression.736bool IsNew = false;737SmallVector<Constant *, 4> NewOperands;738for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) {739Constant *Operand = CE->getOperand(Index);740// If the address space of `Operand` needs to be modified, the new operand741// with the new address space should already be in ValueWithNewAddrSpace742// because (1) the constant expressions we consider (i.e. addrspacecast,743// bitcast, and getelementptr) do not incur cycles in the data flow graph744// and (2) this function is called on constant expressions in postorder.745if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {746IsNew = true;747NewOperands.push_back(cast<Constant>(NewOperand));748continue;749}750if (auto *CExpr = dyn_cast<ConstantExpr>(Operand))751if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(752CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) {753IsNew = true;754NewOperands.push_back(cast<Constant>(NewOperand));755continue;756}757// Otherwise, reuses the old operand.758NewOperands.push_back(Operand);759}760761// If !IsNew, we will replace the Value with itself. However, replaced values762// are assumed to wrapped in an addrspacecast cast later so drop it now.763if (!IsNew)764return nullptr;765766if (CE->getOpcode() == Instruction::GetElementPtr) {767// Needs to specify the source type while constructing a getelementptr768// constant expression.769return CE->getWithOperands(NewOperands, TargetType, /*OnlyIfReduced=*/false,770cast<GEPOperator>(CE)->getSourceElementType());771}772773return CE->getWithOperands(NewOperands, TargetType);774}775776// Returns a clone of the value `V`, with its operands replaced as specified in777// ValueWithNewAddrSpace. This function is called on every flat address778// expression whose address space needs to be modified, in postorder.779//780// See cloneInstructionWithNewAddressSpace for the meaning of PoisonUsesToFix.781Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(782Value *V, unsigned NewAddrSpace,783const ValueToValueMapTy &ValueWithNewAddrSpace,784const PredicatedAddrSpaceMapTy &PredicatedAS,785SmallVectorImpl<const Use *> *PoisonUsesToFix) const {786// All values in Postorder are flat address expressions.787assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&788isAddressExpression(*V, *DL, TTI));789790if (Instruction *I = dyn_cast<Instruction>(V)) {791Value *NewV = cloneInstructionWithNewAddressSpace(792I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);793if (Instruction *NewI = dyn_cast_or_null<Instruction>(NewV)) {794if (NewI->getParent() == nullptr) {795NewI->insertBefore(I);796NewI->takeName(I);797NewI->setDebugLoc(I->getDebugLoc());798}799}800return NewV;801}802803return cloneConstantExprWithNewAddressSpace(804cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI);805}806807// Defines the join operation on the address space lattice (see the file header808// comments).809unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,810unsigned AS2) const {811if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace)812return FlatAddrSpace;813814if (AS1 == UninitializedAddressSpace)815return AS2;816if (AS2 == UninitializedAddressSpace)817return AS1;818819// The join of two different specific address spaces is flat.820return (AS1 == AS2) ? AS1 : FlatAddrSpace;821}822823bool InferAddressSpacesImpl::run(Function &F) {824DL = &F.getDataLayout();825826if (AssumeDefaultIsFlatAddressSpace)827FlatAddrSpace = 0;828829if (FlatAddrSpace == UninitializedAddressSpace) {830FlatAddrSpace = TTI->getFlatAddressSpace();831if (FlatAddrSpace == UninitializedAddressSpace)832return false;833}834835// Collects all flat address expressions in postorder.836std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);837838// Runs a data-flow analysis to refine the address spaces of every expression839// in Postorder.840ValueToAddrSpaceMapTy InferredAddrSpace;841PredicatedAddrSpaceMapTy PredicatedAS;842inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS);843844// Changes the address spaces of the flat address expressions who are inferred845// to point to a specific address space.846return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS,847&F);848}849850// Constants need to be tracked through RAUW to handle cases with nested851// constant expressions, so wrap values in WeakTrackingVH.852void InferAddressSpacesImpl::inferAddressSpaces(853ArrayRef<WeakTrackingVH> Postorder,854ValueToAddrSpaceMapTy &InferredAddrSpace,855PredicatedAddrSpaceMapTy &PredicatedAS) const {856SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());857// Initially, all expressions are in the uninitialized address space.858for (Value *V : Postorder)859InferredAddrSpace[V] = UninitializedAddressSpace;860861while (!Worklist.empty()) {862Value *V = Worklist.pop_back_val();863864// Try to update the address space of the stack top according to the865// address spaces of its operands.866if (!updateAddressSpace(*V, InferredAddrSpace, PredicatedAS))867continue;868869for (Value *User : V->users()) {870// Skip if User is already in the worklist.871if (Worklist.count(User))872continue;873874auto Pos = InferredAddrSpace.find(User);875// Our algorithm only updates the address spaces of flat address876// expressions, which are those in InferredAddrSpace.877if (Pos == InferredAddrSpace.end())878continue;879880// Function updateAddressSpace moves the address space down a lattice881// path. Therefore, nothing to do if User is already inferred as flat (the882// bottom element in the lattice).883if (Pos->second == FlatAddrSpace)884continue;885886Worklist.insert(User);887}888}889}890891unsigned InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &V,892Value *Opnd) const {893const Instruction *I = dyn_cast<Instruction>(&V);894if (!I)895return UninitializedAddressSpace;896897Opnd = Opnd->stripInBoundsOffsets();898for (auto &AssumeVH : AC.assumptionsFor(Opnd)) {899if (!AssumeVH)900continue;901CallInst *CI = cast<CallInst>(AssumeVH);902if (!isValidAssumeForContext(CI, I, DT))903continue;904905const Value *Ptr;906unsigned AS;907std::tie(Ptr, AS) = TTI->getPredicatedAddrSpace(CI->getArgOperand(0));908if (Ptr)909return AS;910}911912return UninitializedAddressSpace;913}914915bool InferAddressSpacesImpl::updateAddressSpace(916const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace,917PredicatedAddrSpaceMapTy &PredicatedAS) const {918assert(InferredAddrSpace.count(&V));919920LLVM_DEBUG(dbgs() << "Updating the address space of\n " << V << '\n');921922// The new inferred address space equals the join of the address spaces923// of all its pointer operands.924unsigned NewAS = UninitializedAddressSpace;925926const Operator &Op = cast<Operator>(V);927if (Op.getOpcode() == Instruction::Select) {928Value *Src0 = Op.getOperand(1);929Value *Src1 = Op.getOperand(2);930931auto I = InferredAddrSpace.find(Src0);932unsigned Src0AS = (I != InferredAddrSpace.end())933? I->second934: Src0->getType()->getPointerAddressSpace();935936auto J = InferredAddrSpace.find(Src1);937unsigned Src1AS = (J != InferredAddrSpace.end())938? J->second939: Src1->getType()->getPointerAddressSpace();940941auto *C0 = dyn_cast<Constant>(Src0);942auto *C1 = dyn_cast<Constant>(Src1);943944// If one of the inputs is a constant, we may be able to do a constant945// addrspacecast of it. Defer inferring the address space until the input946// address space is known.947if ((C1 && Src0AS == UninitializedAddressSpace) ||948(C0 && Src1AS == UninitializedAddressSpace))949return false;950951if (C0 && isSafeToCastConstAddrSpace(C0, Src1AS))952NewAS = Src1AS;953else if (C1 && isSafeToCastConstAddrSpace(C1, Src0AS))954NewAS = Src0AS;955else956NewAS = joinAddressSpaces(Src0AS, Src1AS);957} else {958unsigned AS = TTI->getAssumedAddrSpace(&V);959if (AS != UninitializedAddressSpace) {960// Use the assumed address space directly.961NewAS = AS;962} else {963// Otherwise, infer the address space from its pointer operands.964for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {965auto I = InferredAddrSpace.find(PtrOperand);966unsigned OperandAS;967if (I == InferredAddrSpace.end()) {968OperandAS = PtrOperand->getType()->getPointerAddressSpace();969if (OperandAS == FlatAddrSpace) {970// Check AC for assumption dominating V.971unsigned AS = getPredicatedAddrSpace(V, PtrOperand);972if (AS != UninitializedAddressSpace) {973LLVM_DEBUG(dbgs()974<< " deduce operand AS from the predicate addrspace "975<< AS << '\n');976OperandAS = AS;977// Record this use with the predicated AS.978PredicatedAS[std::make_pair(&V, PtrOperand)] = OperandAS;979}980}981} else982OperandAS = I->second;983984// join(flat, *) = flat. So we can break if NewAS is already flat.985NewAS = joinAddressSpaces(NewAS, OperandAS);986if (NewAS == FlatAddrSpace)987break;988}989}990}991992unsigned OldAS = InferredAddrSpace.lookup(&V);993assert(OldAS != FlatAddrSpace);994if (OldAS == NewAS)995return false;996997// If any updates are made, grabs its users to the worklist because998// their address spaces can also be possibly updated.999LLVM_DEBUG(dbgs() << " to " << NewAS << '\n');1000InferredAddrSpace[&V] = NewAS;1001return true;1002}10031004/// \p returns true if \p U is the pointer operand of a memory instruction with1005/// a single pointer operand that can have its address space changed by simply1006/// mutating the use to a new value. If the memory instruction is volatile,1007/// return true only if the target allows the memory instruction to be volatile1008/// in the new address space.1009static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,1010Use &U, unsigned AddrSpace) {1011User *Inst = U.getUser();1012unsigned OpNo = U.getOperandNo();1013bool VolatileIsAllowed = false;1014if (auto *I = dyn_cast<Instruction>(Inst))1015VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace);10161017if (auto *LI = dyn_cast<LoadInst>(Inst))1018return OpNo == LoadInst::getPointerOperandIndex() &&1019(VolatileIsAllowed || !LI->isVolatile());10201021if (auto *SI = dyn_cast<StoreInst>(Inst))1022return OpNo == StoreInst::getPointerOperandIndex() &&1023(VolatileIsAllowed || !SI->isVolatile());10241025if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))1026return OpNo == AtomicRMWInst::getPointerOperandIndex() &&1027(VolatileIsAllowed || !RMW->isVolatile());10281029if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst))1030return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&1031(VolatileIsAllowed || !CmpX->isVolatile());10321033return false;1034}10351036/// Update memory intrinsic uses that require more complex processing than1037/// simple memory instructions. These require re-mangling and may have multiple1038/// pointer operands.1039static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,1040Value *NewV) {1041IRBuilder<> B(MI);1042MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa);1043MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope);1044MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);10451046if (auto *MSI = dyn_cast<MemSetInst>(MI)) {1047B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(), MSI->getDestAlign(),1048false, // isVolatile1049TBAA, ScopeMD, NoAliasMD);1050} else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {1051Value *Src = MTI->getRawSource();1052Value *Dest = MTI->getRawDest();10531054// Be careful in case this is a self-to-self copy.1055if (Src == OldV)1056Src = NewV;10571058if (Dest == OldV)1059Dest = NewV;10601061if (isa<MemCpyInlineInst>(MTI)) {1062MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);1063B.CreateMemCpyInline(Dest, MTI->getDestAlign(), Src,1064MTI->getSourceAlign(), MTI->getLength(),1065false, // isVolatile1066TBAA, TBAAStruct, ScopeMD, NoAliasMD);1067} else if (isa<MemCpyInst>(MTI)) {1068MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);1069B.CreateMemCpy(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(),1070MTI->getLength(),1071false, // isVolatile1072TBAA, TBAAStruct, ScopeMD, NoAliasMD);1073} else {1074assert(isa<MemMoveInst>(MTI));1075B.CreateMemMove(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(),1076MTI->getLength(),1077false, // isVolatile1078TBAA, ScopeMD, NoAliasMD);1079}1080} else1081llvm_unreachable("unhandled MemIntrinsic");10821083MI->eraseFromParent();1084return true;1085}10861087// \p returns true if it is OK to change the address space of constant \p C with1088// a ConstantExpr addrspacecast.1089bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C,1090unsigned NewAS) const {1091assert(NewAS != UninitializedAddressSpace);10921093unsigned SrcAS = C->getType()->getPointerAddressSpace();1094if (SrcAS == NewAS || isa<UndefValue>(C))1095return true;10961097// Prevent illegal casts between different non-flat address spaces.1098if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace)1099return false;11001101if (isa<ConstantPointerNull>(C))1102return true;11031104if (auto *Op = dyn_cast<Operator>(C)) {1105// If we already have a constant addrspacecast, it should be safe to cast it1106// off.1107if (Op->getOpcode() == Instruction::AddrSpaceCast)1108return isSafeToCastConstAddrSpace(cast<Constant>(Op->getOperand(0)),1109NewAS);11101111if (Op->getOpcode() == Instruction::IntToPtr &&1112Op->getType()->getPointerAddressSpace() == FlatAddrSpace)1113return true;1114}11151116return false;1117}11181119static Value::use_iterator skipToNextUser(Value::use_iterator I,1120Value::use_iterator End) {1121User *CurUser = I->getUser();1122++I;11231124while (I != End && I->getUser() == CurUser)1125++I;11261127return I;1128}11291130bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(1131ArrayRef<WeakTrackingVH> Postorder,1132const ValueToAddrSpaceMapTy &InferredAddrSpace,1133const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const {1134// For each address expression to be modified, creates a clone of it with its1135// pointer operands converted to the new address space. Since the pointer1136// operands are converted, the clone is naturally in the new address space by1137// construction.1138ValueToValueMapTy ValueWithNewAddrSpace;1139SmallVector<const Use *, 32> PoisonUsesToFix;1140for (Value *V : Postorder) {1141unsigned NewAddrSpace = InferredAddrSpace.lookup(V);11421143// In some degenerate cases (e.g. invalid IR in unreachable code), we may1144// not even infer the value to have its original address space.1145if (NewAddrSpace == UninitializedAddressSpace)1146continue;11471148if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {1149Value *New =1150cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace,1151PredicatedAS, &PoisonUsesToFix);1152if (New)1153ValueWithNewAddrSpace[V] = New;1154}1155}11561157if (ValueWithNewAddrSpace.empty())1158return false;11591160// Fixes all the poison uses generated by cloneInstructionWithNewAddressSpace.1161for (const Use *PoisonUse : PoisonUsesToFix) {1162User *V = PoisonUse->getUser();1163User *NewV = cast_or_null<User>(ValueWithNewAddrSpace.lookup(V));1164if (!NewV)1165continue;11661167unsigned OperandNo = PoisonUse->getOperandNo();1168assert(isa<PoisonValue>(NewV->getOperand(OperandNo)));1169NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(PoisonUse->get()));1170}11711172SmallVector<Instruction *, 16> DeadInstructions;1173ValueToValueMapTy VMap;1174ValueMapper VMapper(VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);11751176// Replaces the uses of the old address expressions with the new ones.1177for (const WeakTrackingVH &WVH : Postorder) {1178assert(WVH && "value was unexpectedly deleted");1179Value *V = WVH;1180Value *NewV = ValueWithNewAddrSpace.lookup(V);1181if (NewV == nullptr)1182continue;11831184LLVM_DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n "1185<< *NewV << '\n');11861187if (Constant *C = dyn_cast<Constant>(V)) {1188Constant *Replace =1189ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), C->getType());1190if (C != Replace) {1191LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace1192<< ": " << *Replace << '\n');1193SmallVector<User *, 16> WorkList;1194for (User *U : make_early_inc_range(C->users())) {1195if (auto *I = dyn_cast<Instruction>(U)) {1196if (I->getFunction() == F)1197I->replaceUsesOfWith(C, Replace);1198} else {1199WorkList.append(U->user_begin(), U->user_end());1200}1201}1202if (!WorkList.empty()) {1203VMap[C] = Replace;1204DenseSet<User *> Visited{WorkList.begin(), WorkList.end()};1205while (!WorkList.empty()) {1206User *U = WorkList.pop_back_val();1207if (auto *I = dyn_cast<Instruction>(U)) {1208if (I->getFunction() == F)1209VMapper.remapInstruction(*I);1210continue;1211}1212for (User *U2 : U->users())1213if (Visited.insert(U2).second)1214WorkList.push_back(U2);1215}1216}1217V = Replace;1218}1219}12201221Value::use_iterator I, E, Next;1222for (I = V->use_begin(), E = V->use_end(); I != E;) {1223Use &U = *I;1224User *CurUser = U.getUser();12251226// Some users may see the same pointer operand in multiple operands. Skip1227// to the next instruction.1228I = skipToNextUser(I, E);12291230if (isSimplePointerUseValidToReplace(1231*TTI, U, V->getType()->getPointerAddressSpace())) {1232// If V is used as the pointer operand of a compatible memory operation,1233// sets the pointer operand to NewV. This replacement does not change1234// the element type, so the resultant load/store is still valid.1235U.set(NewV);1236continue;1237}12381239// Skip if the current user is the new value itself.1240if (CurUser == NewV)1241continue;12421243if (auto *CurUserI = dyn_cast<Instruction>(CurUser);1244CurUserI && CurUserI->getFunction() != F)1245continue;12461247// Handle more complex cases like intrinsic that need to be remangled.1248if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {1249if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))1250continue;1251}12521253if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) {1254if (rewriteIntrinsicOperands(II, V, NewV))1255continue;1256}12571258if (isa<Instruction>(CurUser)) {1259if (ICmpInst *Cmp = dyn_cast<ICmpInst>(CurUser)) {1260// If we can infer that both pointers are in the same addrspace,1261// transform e.g.1262// %cmp = icmp eq float* %p, %q1263// into1264// %cmp = icmp eq float addrspace(3)* %new_p, %new_q12651266unsigned NewAS = NewV->getType()->getPointerAddressSpace();1267int SrcIdx = U.getOperandNo();1268int OtherIdx = (SrcIdx == 0) ? 1 : 0;1269Value *OtherSrc = Cmp->getOperand(OtherIdx);12701271if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(OtherSrc)) {1272if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) {1273Cmp->setOperand(OtherIdx, OtherNewV);1274Cmp->setOperand(SrcIdx, NewV);1275continue;1276}1277}12781279// Even if the type mismatches, we can cast the constant.1280if (auto *KOtherSrc = dyn_cast<Constant>(OtherSrc)) {1281if (isSafeToCastConstAddrSpace(KOtherSrc, NewAS)) {1282Cmp->setOperand(SrcIdx, NewV);1283Cmp->setOperand(OtherIdx, ConstantExpr::getAddrSpaceCast(1284KOtherSrc, NewV->getType()));1285continue;1286}1287}1288}12891290if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {1291unsigned NewAS = NewV->getType()->getPointerAddressSpace();1292if (ASC->getDestAddressSpace() == NewAS) {1293ASC->replaceAllUsesWith(NewV);1294DeadInstructions.push_back(ASC);1295continue;1296}1297}12981299// Otherwise, replaces the use with flat(NewV).1300if (Instruction *VInst = dyn_cast<Instruction>(V)) {1301// Don't create a copy of the original addrspacecast.1302if (U == V && isa<AddrSpaceCastInst>(V))1303continue;13041305// Insert the addrspacecast after NewV.1306BasicBlock::iterator InsertPos;1307if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))1308InsertPos = std::next(NewVInst->getIterator());1309else1310InsertPos = std::next(VInst->getIterator());13111312while (isa<PHINode>(InsertPos))1313++InsertPos;1314// This instruction may contain multiple uses of V, update them all.1315CurUser->replaceUsesOfWith(1316V, new AddrSpaceCastInst(NewV, V->getType(), "", InsertPos));1317} else {1318CurUser->replaceUsesOfWith(1319V, ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),1320V->getType()));1321}1322}1323}13241325if (V->use_empty()) {1326if (Instruction *I = dyn_cast<Instruction>(V))1327DeadInstructions.push_back(I);1328}1329}13301331for (Instruction *I : DeadInstructions)1332RecursivelyDeleteTriviallyDeadInstructions(I);13331334return true;1335}13361337bool InferAddressSpaces::runOnFunction(Function &F) {1338if (skipFunction(F))1339return false;13401341auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();1342DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;1343return InferAddressSpacesImpl(1344getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), DT,1345&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),1346FlatAddrSpace)1347.run(F);1348}13491350FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {1351return new InferAddressSpaces(AddressSpace);1352}13531354InferAddressSpacesPass::InferAddressSpacesPass()1355: FlatAddrSpace(UninitializedAddressSpace) {}1356InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)1357: FlatAddrSpace(AddressSpace) {}13581359PreservedAnalyses InferAddressSpacesPass::run(Function &F,1360FunctionAnalysisManager &AM) {1361bool Changed =1362InferAddressSpacesImpl(AM.getResult<AssumptionAnalysis>(F),1363AM.getCachedResult<DominatorTreeAnalysis>(F),1364&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)1365.run(F);1366if (Changed) {1367PreservedAnalyses PA;1368PA.preserveSet<CFGAnalyses>();1369PA.preserve<DominatorTreeAnalysis>();1370return PA;1371}1372return PreservedAnalyses::all();1373}137413751376