Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
35294 views
1
//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This pass marks all internal functions as always_inline and creates
11
/// duplicates of all other functions and marks the duplicates as always_inline.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "AMDGPU.h"
16
#include "AMDGPUTargetMachine.h"
17
#include "Utils/AMDGPUBaseInfo.h"
18
#include "llvm/CodeGen/CommandFlags.h"
19
#include "llvm/IR/Module.h"
20
#include "llvm/Pass.h"
21
#include "llvm/Support/CommandLine.h"
22
23
using namespace llvm;
24
25
namespace {
26
27
static cl::opt<bool> StressCalls(
28
"amdgpu-stress-function-calls",
29
cl::Hidden,
30
cl::desc("Force all functions to be noinline"),
31
cl::init(false));
32
33
class AMDGPUAlwaysInline : public ModulePass {
34
bool GlobalOpt;
35
36
public:
37
static char ID;
38
39
AMDGPUAlwaysInline(bool GlobalOpt = false) :
40
ModulePass(ID), GlobalOpt(GlobalOpt) { }
41
bool runOnModule(Module &M) override;
42
43
void getAnalysisUsage(AnalysisUsage &AU) const override {
44
AU.setPreservesAll();
45
}
46
};
47
48
} // End anonymous namespace
49
50
INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
51
"AMDGPU Inline All Functions", false, false)
52
53
char AMDGPUAlwaysInline::ID = 0;
54
55
static void
56
recursivelyVisitUsers(GlobalValue &GV,
57
SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
58
SmallVector<User *, 16> Stack(GV.users());
59
60
SmallPtrSet<const Value *, 8> Visited;
61
62
while (!Stack.empty()) {
63
User *U = Stack.pop_back_val();
64
if (!Visited.insert(U).second)
65
continue;
66
67
if (Instruction *I = dyn_cast<Instruction>(U)) {
68
Function *F = I->getParent()->getParent();
69
if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
70
// FIXME: This is a horrible hack. We should always respect noinline,
71
// and just let us hit the error when we can't handle this.
72
//
73
// Unfortunately, clang adds noinline to all functions at -O0. We have
74
// to override this here until that's fixed.
75
F->removeFnAttr(Attribute::NoInline);
76
77
FuncsToAlwaysInline.insert(F);
78
Stack.push_back(F);
79
}
80
81
// No need to look at further users, but we do need to inline any callers.
82
continue;
83
}
84
85
append_range(Stack, U->users());
86
}
87
}
88
89
static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
90
std::vector<GlobalAlias*> AliasesToRemove;
91
92
bool Changed = false;
93
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
94
SmallPtrSet<Function *, 8> FuncsToNoInline;
95
Triple TT(M.getTargetTriple());
96
97
for (GlobalAlias &A : M.aliases()) {
98
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
99
if (TT.getArch() == Triple::amdgcn &&
100
A.getLinkage() != GlobalValue::InternalLinkage)
101
continue;
102
Changed = true;
103
A.replaceAllUsesWith(F);
104
AliasesToRemove.push_back(&A);
105
}
106
107
// FIXME: If the aliasee isn't a function, it's some kind of constant expr
108
// cast that won't be inlined through.
109
}
110
111
if (GlobalOpt) {
112
for (GlobalAlias* A : AliasesToRemove) {
113
A->eraseFromParent();
114
}
115
}
116
117
// Always force inlining of any function that uses an LDS global address. This
118
// is something of a workaround because we don't have a way of supporting LDS
119
// objects defined in functions. LDS is always allocated by a kernel, and it
120
// is difficult to manage LDS usage if a function may be used by multiple
121
// kernels.
122
//
123
// OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
124
// should only appear when IPO passes manages to move LDs defined in a kernel
125
// into a single user function.
126
127
for (GlobalVariable &GV : M.globals()) {
128
// TODO: Region address
129
unsigned AS = GV.getAddressSpace();
130
if ((AS == AMDGPUAS::REGION_ADDRESS) ||
131
(AS == AMDGPUAS::LOCAL_ADDRESS &&
132
(!AMDGPUTargetMachine::EnableLowerModuleLDS)))
133
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
134
}
135
136
if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
137
auto IncompatAttr
138
= StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
139
140
for (Function &F : M) {
141
if (!F.isDeclaration() && !F.use_empty() &&
142
!F.hasFnAttribute(IncompatAttr)) {
143
if (StressCalls) {
144
if (!FuncsToAlwaysInline.count(&F))
145
FuncsToNoInline.insert(&F);
146
} else
147
FuncsToAlwaysInline.insert(&F);
148
}
149
}
150
}
151
152
for (Function *F : FuncsToAlwaysInline)
153
F->addFnAttr(Attribute::AlwaysInline);
154
155
for (Function *F : FuncsToNoInline)
156
F->addFnAttr(Attribute::NoInline);
157
158
return Changed || !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
159
}
160
161
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
162
return alwaysInlineImpl(M, GlobalOpt);
163
}
164
165
ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
166
return new AMDGPUAlwaysInline(GlobalOpt);
167
}
168
169
PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M,
170
ModuleAnalysisManager &AM) {
171
const bool Changed = alwaysInlineImpl(M, GlobalOpt);
172
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
173
}
174
175