Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp
35266 views
1
//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements AMDGPU TargetInfo objects.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "AMDGPU.h"
14
#include "clang/Basic/Builtins.h"
15
#include "clang/Basic/CodeGenOptions.h"
16
#include "clang/Basic/Diagnostic.h"
17
#include "clang/Basic/LangOptions.h"
18
#include "clang/Basic/MacroBuilder.h"
19
#include "clang/Basic/TargetBuiltins.h"
20
#include "llvm/ADT/SmallString.h"
21
using namespace clang;
22
using namespace clang::targets;
23
24
namespace clang {
25
namespace targets {
26
27
// If you edit the description strings, make sure you update
28
// getPointerWidthV().
29
30
static const char *const DataLayoutStringR600 =
31
"e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33
34
static const char *const DataLayoutStringAMDGCN =
35
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36
"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
37
"32-v48:64-v96:128"
38
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39
"-ni:7:8:9";
40
41
const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42
llvm::AMDGPUAS::FLAT_ADDRESS, // Default
43
llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
44
llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
45
llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
46
llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
47
llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
48
llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
49
llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
50
llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
51
llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
52
llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
53
llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
54
llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
55
llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
56
llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
57
llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
58
llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
59
llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
60
llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
61
llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
62
};
63
64
const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
65
llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default
66
llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
67
llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
68
llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
69
llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
70
llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
71
llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
72
llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
73
llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
74
llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
75
llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
76
// SYCL address space values for this map are dummy
77
llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
78
llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
79
llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
80
llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
81
llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
82
llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
83
llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
84
llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
85
llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
86
87
};
88
} // namespace targets
89
} // namespace clang
90
91
static constexpr Builtin::Info BuiltinInfo[] = {
92
#define BUILTIN(ID, TYPE, ATTRS) \
93
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
95
{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
96
#include "clang/Basic/BuiltinsAMDGPU.def"
97
};
98
99
const char *const AMDGPUTargetInfo::GCCRegNames[] = {
100
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
101
"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
102
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
103
"v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
104
"v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
105
"v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
106
"v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
107
"v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
108
"v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
109
"v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
110
"v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
111
"v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
112
"v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
113
"v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
114
"v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
115
"v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
116
"v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
117
"v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
118
"v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
119
"v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
120
"v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
121
"v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
122
"v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
123
"v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
124
"v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
125
"v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
126
"v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
127
"v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
128
"v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
129
"s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
130
"s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
131
"s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
132
"s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
133
"s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
134
"s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
135
"s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
136
"s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
137
"s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
138
"s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
139
"s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
140
"s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
141
"s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
142
"s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
143
"m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
144
"flat_scratch_lo", "flat_scratch_hi",
145
"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
146
"a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
147
"a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
148
"a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
149
"a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
150
"a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
151
"a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
152
"a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
153
"a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
154
"a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
155
"a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
156
"a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
157
"a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
158
"a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
159
"a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
160
"a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
161
"a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
162
"a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
163
"a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
164
"a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
165
"a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
166
"a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
167
"a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
168
"a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
169
"a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
170
"a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
171
"a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
172
"a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
173
"a252", "a253", "a254", "a255"
174
};
175
176
ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
177
return llvm::ArrayRef(GCCRegNames);
178
}
179
180
bool AMDGPUTargetInfo::initFeatureMap(
181
llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
182
const std::vector<std::string> &FeatureVec) const {
183
184
using namespace llvm::AMDGPU;
185
fillAMDGPUFeatureMap(CPU, getTriple(), Features);
186
if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
187
return false;
188
189
// TODO: Should move this logic into TargetParser
190
auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features);
191
switch (HasError.first) {
192
default:
193
break;
194
case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
195
Diags.Report(diag::err_invalid_feature_combination) << HasError.second;
196
return false;
197
case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
198
Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second;
199
return false;
200
}
201
202
return true;
203
}
204
205
void AMDGPUTargetInfo::fillValidCPUList(
206
SmallVectorImpl<StringRef> &Values) const {
207
if (isAMDGCN(getTriple()))
208
llvm::AMDGPU::fillValidArchListAMDGCN(Values);
209
else
210
llvm::AMDGPU::fillValidArchListR600(Values);
211
}
212
213
void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
214
AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
215
}
216
217
AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
218
const TargetOptions &Opts)
219
: TargetInfo(Triple),
220
GPUKind(isAMDGCN(Triple) ?
221
llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
222
llvm::AMDGPU::parseArchR600(Opts.CPU)),
223
GPUFeatures(isAMDGCN(Triple) ?
224
llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
225
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
226
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
227
: DataLayoutStringR600);
228
229
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
230
!isAMDGCN(Triple));
231
UseAddrSpaceMapMangling = true;
232
233
if (isAMDGCN(Triple)) {
234
// __bf16 is always available as a load/store only type on AMDGCN.
235
BFloat16Width = BFloat16Align = 16;
236
BFloat16Format = &llvm::APFloat::BFloat();
237
}
238
239
HasLegalHalfType = true;
240
HasFloat16 = true;
241
WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
242
AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
243
244
// Set pointer width and alignment for the generic address space.
245
PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
246
if (getMaxPointerWidth() == 64) {
247
LongWidth = LongAlign = 64;
248
SizeType = UnsignedLong;
249
PtrDiffType = SignedLong;
250
IntPtrType = SignedLong;
251
}
252
253
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
254
CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
255
for (auto F : {"image-insts", "gws"})
256
ReadOnlyFeatures.insert(F);
257
HalfArgsAndReturns = true;
258
}
259
260
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
261
TargetInfo::adjust(Diags, Opts);
262
// ToDo: There are still a few places using default address space as private
263
// address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
264
// can be removed from the following line.
265
setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
266
!isAMDGCN(getTriple()));
267
}
268
269
ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
270
return llvm::ArrayRef(BuiltinInfo,
271
clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
272
}
273
274
void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
275
MacroBuilder &Builder) const {
276
Builder.defineMacro("__AMD__");
277
Builder.defineMacro("__AMDGPU__");
278
279
if (isAMDGCN(getTriple()))
280
Builder.defineMacro("__AMDGCN__");
281
else
282
Builder.defineMacro("__R600__");
283
284
// Legacy HIP host code relies on these default attributes to be defined.
285
bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
286
if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
287
return;
288
289
llvm::SmallString<16> CanonName =
290
(isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
291
: getArchNameR600(GPUKind));
292
293
// Sanitize the name of generic targets.
294
// e.g. gfx10-1-generic -> gfx10_1_generic
295
if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
296
GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
297
std::replace(CanonName.begin(), CanonName.end(), '-', '_');
298
}
299
300
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
301
// Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
302
if (isAMDGCN(getTriple()) && !IsHIPHost) {
303
assert(StringRef(CanonName).starts_with("gfx") &&
304
"Invalid amdgcn canonical name");
305
StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
306
Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
307
Twine("__"));
308
Builder.defineMacro("__amdgcn_processor__",
309
Twine("\"") + Twine(CanonName) + Twine("\""));
310
Builder.defineMacro("__amdgcn_target_id__",
311
Twine("\"") + Twine(*getTargetID()) + Twine("\""));
312
for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
313
auto Loc = OffloadArchFeatures.find(F);
314
if (Loc != OffloadArchFeatures.end()) {
315
std::string NewF = F.str();
316
std::replace(NewF.begin(), NewF.end(), '-', '_');
317
Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
318
Twine("__"),
319
Loc->second ? "1" : "0");
320
}
321
}
322
}
323
324
if (AllowAMDGPUUnsafeFPAtomics)
325
Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
326
327
// TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
328
// removed in the near future.
329
if (hasFMAF())
330
Builder.defineMacro("__HAS_FMAF__");
331
if (hasFastFMAF())
332
Builder.defineMacro("FP_FAST_FMAF");
333
if (hasLDEXPF())
334
Builder.defineMacro("__HAS_LDEXPF__");
335
if (hasFP64())
336
Builder.defineMacro("__HAS_FP64__");
337
if (hasFastFMA())
338
Builder.defineMacro("FP_FAST_FMA");
339
340
Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
341
// ToDo: deprecate this macro for naming consistency.
342
Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
343
Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
344
}
345
346
void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
347
assert(HalfFormat == Aux->HalfFormat);
348
assert(FloatFormat == Aux->FloatFormat);
349
assert(DoubleFormat == Aux->DoubleFormat);
350
351
// On x86_64 long double is 80-bit extended precision format, which is
352
// not supported by AMDGPU. 128-bit floating point format is also not
353
// supported by AMDGPU. Therefore keep its own format for these two types.
354
auto SaveLongDoubleFormat = LongDoubleFormat;
355
auto SaveFloat128Format = Float128Format;
356
auto SaveLongDoubleWidth = LongDoubleWidth;
357
auto SaveLongDoubleAlign = LongDoubleAlign;
358
copyAuxTarget(Aux);
359
LongDoubleFormat = SaveLongDoubleFormat;
360
Float128Format = SaveFloat128Format;
361
LongDoubleWidth = SaveLongDoubleWidth;
362
LongDoubleAlign = SaveLongDoubleAlign;
363
// For certain builtin types support on the host target, claim they are
364
// support to pass the compilation of the host code during the device-side
365
// compilation.
366
// FIXME: As the side effect, we also accept `__float128` uses in the device
367
// code. To rejct these builtin types supported in the host target but not in
368
// the device target, one approach would support `device_builtin` attribute
369
// so that we could tell the device builtin types from the host ones. The
370
// also solves the different representations of the same builtin type, such
371
// as `size_t` in the MSVC environment.
372
if (Aux->hasFloat128Type()) {
373
HasFloat128 = true;
374
Float128Format = DoubleFormat;
375
}
376
}
377
378