Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
35294 views
//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file9/// This pass marks all internal functions as always_inline and creates10/// duplicates of all other functions and marks the duplicates as always_inline.11//12//===----------------------------------------------------------------------===//1314#include "AMDGPU.h"15#include "AMDGPUTargetMachine.h"16#include "Utils/AMDGPUBaseInfo.h"17#include "llvm/CodeGen/CommandFlags.h"18#include "llvm/IR/Module.h"19#include "llvm/Pass.h"20#include "llvm/Support/CommandLine.h"2122using namespace llvm;2324namespace {2526static cl::opt<bool> StressCalls(27"amdgpu-stress-function-calls",28cl::Hidden,29cl::desc("Force all functions to be noinline"),30cl::init(false));3132class AMDGPUAlwaysInline : public ModulePass {33bool GlobalOpt;3435public:36static char ID;3738AMDGPUAlwaysInline(bool GlobalOpt = false) :39ModulePass(ID), GlobalOpt(GlobalOpt) { }40bool runOnModule(Module &M) override;4142void getAnalysisUsage(AnalysisUsage &AU) const override {43AU.setPreservesAll();44}45};4647} // End anonymous namespace4849INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",50"AMDGPU Inline All Functions", false, false)5152char AMDGPUAlwaysInline::ID = 0;5354static void55recursivelyVisitUsers(GlobalValue &GV,56SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {57SmallVector<User *, 16> Stack(GV.users());5859SmallPtrSet<const Value *, 8> Visited;6061while (!Stack.empty()) {62User *U = Stack.pop_back_val();63if (!Visited.insert(U).second)64continue;6566if (Instruction *I = dyn_cast<Instruction>(U)) {67Function *F = I->getParent()->getParent();68if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {69// FIXME: This is a horrible hack. We should always respect noinline,70// and just let us hit the error when we can't handle this.71//72// Unfortunately, clang adds noinline to all functions at -O0. We have73// to override this here until that's fixed.74F->removeFnAttr(Attribute::NoInline);7576FuncsToAlwaysInline.insert(F);77Stack.push_back(F);78}7980// No need to look at further users, but we do need to inline any callers.81continue;82}8384append_range(Stack, U->users());85}86}8788static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {89std::vector<GlobalAlias*> AliasesToRemove;9091bool Changed = false;92SmallPtrSet<Function *, 8> FuncsToAlwaysInline;93SmallPtrSet<Function *, 8> FuncsToNoInline;94Triple TT(M.getTargetTriple());9596for (GlobalAlias &A : M.aliases()) {97if (Function* F = dyn_cast<Function>(A.getAliasee())) {98if (TT.getArch() == Triple::amdgcn &&99A.getLinkage() != GlobalValue::InternalLinkage)100continue;101Changed = true;102A.replaceAllUsesWith(F);103AliasesToRemove.push_back(&A);104}105106// FIXME: If the aliasee isn't a function, it's some kind of constant expr107// cast that won't be inlined through.108}109110if (GlobalOpt) {111for (GlobalAlias* A : AliasesToRemove) {112A->eraseFromParent();113}114}115116// Always force inlining of any function that uses an LDS global address. This117// is something of a workaround because we don't have a way of supporting LDS118// objects defined in functions. LDS is always allocated by a kernel, and it119// is difficult to manage LDS usage if a function may be used by multiple120// kernels.121//122// OpenCL doesn't allow declaring LDS in non-kernels, so in practice this123// should only appear when IPO passes manages to move LDs defined in a kernel124// into a single user function.125126for (GlobalVariable &GV : M.globals()) {127// TODO: Region address128unsigned AS = GV.getAddressSpace();129if ((AS == AMDGPUAS::REGION_ADDRESS) ||130(AS == AMDGPUAS::LOCAL_ADDRESS &&131(!AMDGPUTargetMachine::EnableLowerModuleLDS)))132recursivelyVisitUsers(GV, FuncsToAlwaysInline);133}134135if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {136auto IncompatAttr137= StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;138139for (Function &F : M) {140if (!F.isDeclaration() && !F.use_empty() &&141!F.hasFnAttribute(IncompatAttr)) {142if (StressCalls) {143if (!FuncsToAlwaysInline.count(&F))144FuncsToNoInline.insert(&F);145} else146FuncsToAlwaysInline.insert(&F);147}148}149}150151for (Function *F : FuncsToAlwaysInline)152F->addFnAttr(Attribute::AlwaysInline);153154for (Function *F : FuncsToNoInline)155F->addFnAttr(Attribute::NoInline);156157return Changed || !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();158}159160bool AMDGPUAlwaysInline::runOnModule(Module &M) {161return alwaysInlineImpl(M, GlobalOpt);162}163164ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {165return new AMDGPUAlwaysInline(GlobalOpt);166}167168PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M,169ModuleAnalysisManager &AM) {170const bool Changed = alwaysInlineImpl(M, GlobalOpt);171return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();172}173174175