Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Basic/Targets/NVPTX.cpp
35294 views
1
//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements NVPTX TargetInfo objects.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "NVPTX.h"
14
#include "Targets.h"
15
#include "clang/Basic/Builtins.h"
16
#include "clang/Basic/MacroBuilder.h"
17
#include "clang/Basic/TargetBuiltins.h"
18
#include "llvm/ADT/StringSwitch.h"
19
20
using namespace clang;
21
using namespace clang::targets;
22
23
static constexpr Builtin::Info BuiltinInfo[] = {
24
#define BUILTIN(ID, TYPE, ATTRS) \
25
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
26
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
27
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
28
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
29
{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
30
#include "clang/Basic/BuiltinsNVPTX.def"
31
};
32
33
const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
34
35
NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
36
const TargetOptions &Opts,
37
unsigned TargetPointerWidth)
38
: TargetInfo(Triple) {
39
assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) &&
40
"NVPTX only supports 32- and 64-bit modes.");
41
42
PTXVersion = 32;
43
for (const StringRef Feature : Opts.FeaturesAsWritten) {
44
int PTXV;
45
if (!Feature.starts_with("+ptx") ||
46
Feature.drop_front(4).getAsInteger(10, PTXV))
47
continue;
48
PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?
49
}
50
51
TLSSupported = false;
52
VLASupported = false;
53
AddrSpaceMap = &NVPTXAddrSpaceMap;
54
UseAddrSpaceMapMangling = true;
55
// __bf16 is always available as a load/store only type.
56
BFloat16Width = BFloat16Align = 16;
57
BFloat16Format = &llvm::APFloat::BFloat();
58
59
// Define available target features
60
// These must be defined in sorted order!
61
NoAsmVariants = true;
62
GPU = OffloadArch::UNUSED;
63
64
// PTX supports f16 as a fundamental type.
65
HasLegalHalfType = true;
66
HasFloat16 = true;
67
68
if (TargetPointerWidth == 32)
69
resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
70
else if (Opts.NVPTXUseShortPointers)
71
resetDataLayout(
72
"e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
73
else
74
resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
75
76
// If possible, get a TargetInfo for our host triple, so we can match its
77
// types.
78
llvm::Triple HostTriple(Opts.HostTriple);
79
if (!HostTriple.isNVPTX())
80
HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts);
81
82
// If no host target, make some guesses about the data layout and return.
83
if (!HostTarget) {
84
LongWidth = LongAlign = TargetPointerWidth;
85
PointerWidth = PointerAlign = TargetPointerWidth;
86
switch (TargetPointerWidth) {
87
case 32:
88
SizeType = TargetInfo::UnsignedInt;
89
PtrDiffType = TargetInfo::SignedInt;
90
IntPtrType = TargetInfo::SignedInt;
91
break;
92
case 64:
93
SizeType = TargetInfo::UnsignedLong;
94
PtrDiffType = TargetInfo::SignedLong;
95
IntPtrType = TargetInfo::SignedLong;
96
break;
97
default:
98
llvm_unreachable("TargetPointerWidth must be 32 or 64");
99
}
100
101
MaxAtomicInlineWidth = TargetPointerWidth;
102
return;
103
}
104
105
// Copy properties from host target.
106
PointerWidth = HostTarget->getPointerWidth(LangAS::Default);
107
PointerAlign = HostTarget->getPointerAlign(LangAS::Default);
108
BoolWidth = HostTarget->getBoolWidth();
109
BoolAlign = HostTarget->getBoolAlign();
110
IntWidth = HostTarget->getIntWidth();
111
IntAlign = HostTarget->getIntAlign();
112
HalfWidth = HostTarget->getHalfWidth();
113
HalfAlign = HostTarget->getHalfAlign();
114
FloatWidth = HostTarget->getFloatWidth();
115
FloatAlign = HostTarget->getFloatAlign();
116
DoubleWidth = HostTarget->getDoubleWidth();
117
DoubleAlign = HostTarget->getDoubleAlign();
118
LongWidth = HostTarget->getLongWidth();
119
LongAlign = HostTarget->getLongAlign();
120
LongLongWidth = HostTarget->getLongLongWidth();
121
LongLongAlign = HostTarget->getLongLongAlign();
122
MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0,
123
/* HasNonWeakDef = */ true);
124
NewAlign = HostTarget->getNewAlign();
125
DefaultAlignForAttributeAligned =
126
HostTarget->getDefaultAlignForAttributeAligned();
127
SizeType = HostTarget->getSizeType();
128
IntMaxType = HostTarget->getIntMaxType();
129
PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default);
130
IntPtrType = HostTarget->getIntPtrType();
131
WCharType = HostTarget->getWCharType();
132
WIntType = HostTarget->getWIntType();
133
Char16Type = HostTarget->getChar16Type();
134
Char32Type = HostTarget->getChar32Type();
135
Int64Type = HostTarget->getInt64Type();
136
SigAtomicType = HostTarget->getSigAtomicType();
137
ProcessIDType = HostTarget->getProcessIDType();
138
139
UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
140
UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();
141
UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
142
ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
143
144
// This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
145
// we need those macros to be identical on host and device, because (among
146
// other things) they affect which standard library classes are defined, and
147
// we need all classes to be defined on both the host and device.
148
MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();
149
150
// Properties intentionally not copied from host:
151
// - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
152
// host/device boundary.
153
// - SuitableAlign: Not visible across the host/device boundary, and may
154
// correctly be different on host/device, e.g. if host has wider vector
155
// types than device.
156
// - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
157
// as its double type, but that's not necessarily true on the host.
158
// TODO: nvcc emits a warning when using long double on device; we should
159
// do the same.
160
}
161
162
ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
163
return llvm::ArrayRef(GCCRegNames);
164
}
165
166
bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
167
return llvm::StringSwitch<bool>(Feature)
168
.Cases("ptx", "nvptx", true)
169
.Default(false);
170
}
171
172
void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
173
MacroBuilder &Builder) const {
174
Builder.defineMacro("__PTX__");
175
Builder.defineMacro("__NVPTX__");
176
177
// Skip setting architecture dependent macros if undefined.
178
if (GPU == OffloadArch::UNUSED && !HostTarget)
179
return;
180
181
if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
182
// Set __CUDA_ARCH__ for the GPU specified.
183
std::string CUDAArchCode = [this] {
184
switch (GPU) {
185
case OffloadArch::GFX600:
186
case OffloadArch::GFX601:
187
case OffloadArch::GFX602:
188
case OffloadArch::GFX700:
189
case OffloadArch::GFX701:
190
case OffloadArch::GFX702:
191
case OffloadArch::GFX703:
192
case OffloadArch::GFX704:
193
case OffloadArch::GFX705:
194
case OffloadArch::GFX801:
195
case OffloadArch::GFX802:
196
case OffloadArch::GFX803:
197
case OffloadArch::GFX805:
198
case OffloadArch::GFX810:
199
case OffloadArch::GFX9_GENERIC:
200
case OffloadArch::GFX900:
201
case OffloadArch::GFX902:
202
case OffloadArch::GFX904:
203
case OffloadArch::GFX906:
204
case OffloadArch::GFX908:
205
case OffloadArch::GFX909:
206
case OffloadArch::GFX90a:
207
case OffloadArch::GFX90c:
208
case OffloadArch::GFX940:
209
case OffloadArch::GFX941:
210
case OffloadArch::GFX942:
211
case OffloadArch::GFX10_1_GENERIC:
212
case OffloadArch::GFX1010:
213
case OffloadArch::GFX1011:
214
case OffloadArch::GFX1012:
215
case OffloadArch::GFX1013:
216
case OffloadArch::GFX10_3_GENERIC:
217
case OffloadArch::GFX1030:
218
case OffloadArch::GFX1031:
219
case OffloadArch::GFX1032:
220
case OffloadArch::GFX1033:
221
case OffloadArch::GFX1034:
222
case OffloadArch::GFX1035:
223
case OffloadArch::GFX1036:
224
case OffloadArch::GFX11_GENERIC:
225
case OffloadArch::GFX1100:
226
case OffloadArch::GFX1101:
227
case OffloadArch::GFX1102:
228
case OffloadArch::GFX1103:
229
case OffloadArch::GFX1150:
230
case OffloadArch::GFX1151:
231
case OffloadArch::GFX1152:
232
case OffloadArch::GFX12_GENERIC:
233
case OffloadArch::GFX1200:
234
case OffloadArch::GFX1201:
235
case OffloadArch::AMDGCNSPIRV:
236
case OffloadArch::Generic:
237
case OffloadArch::LAST:
238
break;
239
case OffloadArch::UNKNOWN:
240
assert(false && "No GPU arch when compiling CUDA device code.");
241
return "";
242
case OffloadArch::UNUSED:
243
case OffloadArch::SM_20:
244
return "200";
245
case OffloadArch::SM_21:
246
return "210";
247
case OffloadArch::SM_30:
248
return "300";
249
case OffloadArch::SM_32_:
250
return "320";
251
case OffloadArch::SM_35:
252
return "350";
253
case OffloadArch::SM_37:
254
return "370";
255
case OffloadArch::SM_50:
256
return "500";
257
case OffloadArch::SM_52:
258
return "520";
259
case OffloadArch::SM_53:
260
return "530";
261
case OffloadArch::SM_60:
262
return "600";
263
case OffloadArch::SM_61:
264
return "610";
265
case OffloadArch::SM_62:
266
return "620";
267
case OffloadArch::SM_70:
268
return "700";
269
case OffloadArch::SM_72:
270
return "720";
271
case OffloadArch::SM_75:
272
return "750";
273
case OffloadArch::SM_80:
274
return "800";
275
case OffloadArch::SM_86:
276
return "860";
277
case OffloadArch::SM_87:
278
return "870";
279
case OffloadArch::SM_89:
280
return "890";
281
case OffloadArch::SM_90:
282
case OffloadArch::SM_90a:
283
return "900";
284
}
285
llvm_unreachable("unhandled OffloadArch");
286
}();
287
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
288
if (GPU == OffloadArch::SM_90a)
289
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
290
}
291
}
292
293
ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {
294
return llvm::ArrayRef(BuiltinInfo,
295
clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin);
296
}
297
298