Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
35294 views
1
//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// AMDGPU HSA Metadata Streamer.
11
///
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "AMDGPUHSAMetadataStreamer.h"
16
#include "AMDGPU.h"
17
#include "GCNSubtarget.h"
18
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
19
#include "SIMachineFunctionInfo.h"
20
#include "SIProgramInfo.h"
21
#include "llvm/IR/Module.h"
22
#include "llvm/MC/MCContext.h"
23
#include "llvm/MC/MCExpr.h"
24
using namespace llvm;
25
26
static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
27
const DataLayout &DL) {
28
Type *Ty = Arg.getType();
29
MaybeAlign ArgAlign;
30
if (Arg.hasByRefAttr()) {
31
Ty = Arg.getParamByRefType();
32
ArgAlign = Arg.getParamAlign();
33
}
34
35
if (!ArgAlign)
36
ArgAlign = DL.getABITypeAlign(Ty);
37
38
return std::pair(Ty, *ArgAlign);
39
}
40
41
namespace llvm {
42
43
static cl::opt<bool> DumpHSAMetadata(
44
"amdgpu-dump-hsa-metadata",
45
cl::desc("Dump AMDGPU HSA Metadata"));
46
static cl::opt<bool> VerifyHSAMetadata(
47
"amdgpu-verify-hsa-metadata",
48
cl::desc("Verify AMDGPU HSA Metadata"));
49
50
namespace AMDGPU::HSAMD {
51
52
//===----------------------------------------------------------------------===//
53
// HSAMetadataStreamerV4
54
//===----------------------------------------------------------------------===//
55
56
void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
57
errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
58
}
59
60
void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
61
errs() << "AMDGPU HSA Metadata Parser Test: ";
62
63
msgpack::Document FromHSAMetadataString;
64
65
if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
66
errs() << "FAIL\n";
67
return;
68
}
69
70
std::string ToHSAMetadataString;
71
raw_string_ostream StrOS(ToHSAMetadataString);
72
FromHSAMetadataString.toYAML(StrOS);
73
74
errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
75
if (HSAMetadataString != ToHSAMetadataString) {
76
errs() << "Original input: " << HSAMetadataString << '\n'
77
<< "Produced output: " << StrOS.str() << '\n';
78
}
79
}
80
81
std::optional<StringRef>
82
MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {
83
return StringSwitch<std::optional<StringRef>>(AccQual)
84
.Case("read_only", StringRef("read_only"))
85
.Case("write_only", StringRef("write_only"))
86
.Case("read_write", StringRef("read_write"))
87
.Default(std::nullopt);
88
}
89
90
std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
91
unsigned AddressSpace) const {
92
switch (AddressSpace) {
93
case AMDGPUAS::PRIVATE_ADDRESS:
94
return StringRef("private");
95
case AMDGPUAS::GLOBAL_ADDRESS:
96
return StringRef("global");
97
case AMDGPUAS::CONSTANT_ADDRESS:
98
return StringRef("constant");
99
case AMDGPUAS::LOCAL_ADDRESS:
100
return StringRef("local");
101
case AMDGPUAS::FLAT_ADDRESS:
102
return StringRef("generic");
103
case AMDGPUAS::REGION_ADDRESS:
104
return StringRef("region");
105
default:
106
return std::nullopt;
107
}
108
}
109
110
StringRef
111
MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,
112
StringRef BaseTypeName) const {
113
if (TypeQual.contains("pipe"))
114
return "pipe";
115
116
return StringSwitch<StringRef>(BaseTypeName)
117
.Case("image1d_t", "image")
118
.Case("image1d_array_t", "image")
119
.Case("image1d_buffer_t", "image")
120
.Case("image2d_t", "image")
121
.Case("image2d_array_t", "image")
122
.Case("image2d_array_depth_t", "image")
123
.Case("image2d_array_msaa_t", "image")
124
.Case("image2d_array_msaa_depth_t", "image")
125
.Case("image2d_depth_t", "image")
126
.Case("image2d_msaa_t", "image")
127
.Case("image2d_msaa_depth_t", "image")
128
.Case("image3d_t", "image")
129
.Case("sampler_t", "sampler")
130
.Case("queue_t", "queue")
131
.Default(isa<PointerType>(Ty)
132
? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
133
? "dynamic_shared_pointer"
134
: "global_buffer")
135
: "by_value");
136
}
137
138
std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,
139
bool Signed) const {
140
switch (Ty->getTypeID()) {
141
case Type::IntegerTyID: {
142
if (!Signed)
143
return (Twine('u') + getTypeName(Ty, true)).str();
144
145
auto BitWidth = Ty->getIntegerBitWidth();
146
switch (BitWidth) {
147
case 8:
148
return "char";
149
case 16:
150
return "short";
151
case 32:
152
return "int";
153
case 64:
154
return "long";
155
default:
156
return (Twine('i') + Twine(BitWidth)).str();
157
}
158
}
159
case Type::HalfTyID:
160
return "half";
161
case Type::FloatTyID:
162
return "float";
163
case Type::DoubleTyID:
164
return "double";
165
case Type::FixedVectorTyID: {
166
auto VecTy = cast<FixedVectorType>(Ty);
167
auto ElTy = VecTy->getElementType();
168
auto NumElements = VecTy->getNumElements();
169
return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
170
}
171
default:
172
return "unknown";
173
}
174
}
175
176
msgpack::ArrayDocNode
177
MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {
178
auto Dims = HSAMetadataDoc->getArrayNode();
179
if (Node->getNumOperands() != 3)
180
return Dims;
181
182
for (auto &Op : Node->operands())
183
Dims.push_back(Dims.getDocument()->getNode(
184
uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
185
return Dims;
186
}
187
188
void MetadataStreamerMsgPackV4::emitVersion() {
189
auto Version = HSAMetadataDoc->getArrayNode();
190
Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
191
Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
192
getRootMetadata("amdhsa.version") = Version;
193
}
194
195
void MetadataStreamerMsgPackV4::emitTargetID(
196
const IsaInfo::AMDGPUTargetID &TargetID) {
197
getRootMetadata("amdhsa.target") =
198
HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
199
}
200
201
void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {
202
auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
203
if (!Node)
204
return;
205
206
auto Printf = HSAMetadataDoc->getArrayNode();
207
for (auto *Op : Node->operands())
208
if (Op->getNumOperands())
209
Printf.push_back(Printf.getDocument()->getNode(
210
cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
211
getRootMetadata("amdhsa.printf") = Printf;
212
}
213
214
void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
215
msgpack::MapDocNode Kern) {
216
// TODO: What about other languages?
217
auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
218
if (!Node || !Node->getNumOperands())
219
return;
220
auto Op0 = Node->getOperand(0);
221
if (Op0->getNumOperands() <= 1)
222
return;
223
224
Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
225
auto LanguageVersion = Kern.getDocument()->getArrayNode();
226
LanguageVersion.push_back(Kern.getDocument()->getNode(
227
mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
228
LanguageVersion.push_back(Kern.getDocument()->getNode(
229
mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
230
Kern[".language_version"] = LanguageVersion;
231
}
232
233
void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func,
234
msgpack::MapDocNode Kern) {
235
236
if (auto Node = Func.getMetadata("reqd_work_group_size"))
237
Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
238
if (auto Node = Func.getMetadata("work_group_size_hint"))
239
Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
240
if (auto Node = Func.getMetadata("vec_type_hint")) {
241
Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
242
getTypeName(
243
cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
244
mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
245
/*Copy=*/true);
246
}
247
if (Func.hasFnAttribute("runtime-handle")) {
248
Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
249
Func.getFnAttribute("runtime-handle").getValueAsString().str(),
250
/*Copy=*/true);
251
}
252
if (Func.hasFnAttribute("device-init"))
253
Kern[".kind"] = Kern.getDocument()->getNode("init");
254
else if (Func.hasFnAttribute("device-fini"))
255
Kern[".kind"] = Kern.getDocument()->getNode("fini");
256
}
257
258
void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
259
msgpack::MapDocNode Kern) {
260
auto &Func = MF.getFunction();
261
unsigned Offset = 0;
262
auto Args = HSAMetadataDoc->getArrayNode();
263
for (auto &Arg : Func.args())
264
emitKernelArg(Arg, Offset, Args);
265
266
emitHiddenKernelArgs(MF, Offset, Args);
267
268
Kern[".args"] = Args;
269
}
270
271
void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
272
unsigned &Offset,
273
msgpack::ArrayDocNode Args) {
274
auto Func = Arg.getParent();
275
auto ArgNo = Arg.getArgNo();
276
const MDNode *Node;
277
278
StringRef Name;
279
Node = Func->getMetadata("kernel_arg_name");
280
if (Node && ArgNo < Node->getNumOperands())
281
Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
282
else if (Arg.hasName())
283
Name = Arg.getName();
284
285
StringRef TypeName;
286
Node = Func->getMetadata("kernel_arg_type");
287
if (Node && ArgNo < Node->getNumOperands())
288
TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
289
290
StringRef BaseTypeName;
291
Node = Func->getMetadata("kernel_arg_base_type");
292
if (Node && ArgNo < Node->getNumOperands())
293
BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
294
295
StringRef ActAccQual;
296
// Do we really need NoAlias check here?
297
if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
298
if (Arg.onlyReadsMemory())
299
ActAccQual = "read_only";
300
else if (Arg.hasAttribute(Attribute::WriteOnly))
301
ActAccQual = "write_only";
302
}
303
304
StringRef AccQual;
305
Node = Func->getMetadata("kernel_arg_access_qual");
306
if (Node && ArgNo < Node->getNumOperands())
307
AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
308
309
StringRef TypeQual;
310
Node = Func->getMetadata("kernel_arg_type_qual");
311
if (Node && ArgNo < Node->getNumOperands())
312
TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
313
314
const DataLayout &DL = Func->getDataLayout();
315
316
MaybeAlign PointeeAlign;
317
Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
318
319
// FIXME: Need to distinguish in memory alignment from pointer alignment.
320
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
321
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
322
PointeeAlign = Arg.getParamAlign().valueOrOne();
323
}
324
325
// There's no distinction between byval aggregates and raw aggregates.
326
Type *ArgTy;
327
Align ArgAlign;
328
std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
329
330
emitKernelArg(DL, ArgTy, ArgAlign,
331
getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
332
PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
333
AccQual, TypeQual);
334
}
335
336
void MetadataStreamerMsgPackV4::emitKernelArg(
337
const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
338
unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
339
StringRef Name, StringRef TypeName, StringRef BaseTypeName,
340
StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
341
auto Arg = Args.getDocument()->getMapNode();
342
343
if (!Name.empty())
344
Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
345
if (!TypeName.empty())
346
Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
347
auto Size = DL.getTypeAllocSize(Ty);
348
Arg[".size"] = Arg.getDocument()->getNode(Size);
349
Offset = alignTo(Offset, Alignment);
350
Arg[".offset"] = Arg.getDocument()->getNode(Offset);
351
Offset += Size;
352
Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
353
if (PointeeAlign)
354
Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
355
356
if (auto PtrTy = dyn_cast<PointerType>(Ty))
357
if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
358
// Limiting address space to emit only for a certain ValueKind.
359
if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
360
Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,
361
/*Copy=*/true);
362
363
if (auto AQ = getAccessQualifier(AccQual))
364
Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
365
366
if (auto AAQ = getAccessQualifier(ActAccQual))
367
Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true);
368
369
SmallVector<StringRef, 1> SplitTypeQuals;
370
TypeQual.split(SplitTypeQuals, " ", -1, false);
371
for (StringRef Key : SplitTypeQuals) {
372
if (Key == "const")
373
Arg[".is_const"] = Arg.getDocument()->getNode(true);
374
else if (Key == "restrict")
375
Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
376
else if (Key == "volatile")
377
Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
378
else if (Key == "pipe")
379
Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
380
}
381
382
Args.push_back(Arg);
383
}
384
385
void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
386
const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
387
auto &Func = MF.getFunction();
388
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
389
390
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
391
if (!HiddenArgNumBytes)
392
return;
393
394
const Module *M = Func.getParent();
395
auto &DL = M->getDataLayout();
396
auto Int64Ty = Type::getInt64Ty(Func.getContext());
397
398
Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
399
400
if (HiddenArgNumBytes >= 8)
401
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
402
Args);
403
if (HiddenArgNumBytes >= 16)
404
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
405
Args);
406
if (HiddenArgNumBytes >= 24)
407
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
408
Args);
409
410
auto Int8PtrTy =
411
PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
412
413
if (HiddenArgNumBytes >= 32) {
414
// We forbid the use of features requiring hostcall when compiling OpenCL
415
// before code object V5, which makes the mutual exclusion between the
416
// "printf buffer" and "hostcall buffer" here sound.
417
if (M->getNamedMetadata("llvm.printf.fmts"))
418
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
419
Args);
420
else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
421
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
422
Args);
423
else
424
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
425
}
426
427
// Emit "default queue" and "completion action" arguments if enqueue kernel is
428
// used, otherwise emit dummy "none" arguments.
429
if (HiddenArgNumBytes >= 40) {
430
if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
431
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
432
Args);
433
} else {
434
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
435
}
436
}
437
438
if (HiddenArgNumBytes >= 48) {
439
if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
440
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
441
Args);
442
} else {
443
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
444
}
445
}
446
447
// Emit the pointer argument for multi-grid object.
448
if (HiddenArgNumBytes >= 56) {
449
if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
450
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
451
Args);
452
} else {
453
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
454
}
455
}
456
}
457
458
msgpack::MapDocNode
459
MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
460
const SIProgramInfo &ProgramInfo,
461
unsigned CodeObjectVersion) const {
462
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
463
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
464
const Function &F = MF.getFunction();
465
466
auto Kern = HSAMetadataDoc->getMapNode();
467
468
Align MaxKernArgAlign;
469
Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
470
STM.getKernArgSegmentSize(F, MaxKernArgAlign));
471
Kern[".group_segment_fixed_size"] =
472
Kern.getDocument()->getNode(ProgramInfo.LDSSize);
473
DelayedExprs->assignDocNode(Kern[".private_segment_fixed_size"],
474
msgpack::Type::UInt, ProgramInfo.ScratchSize);
475
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {
476
DelayedExprs->assignDocNode(Kern[".uses_dynamic_stack"],
477
msgpack::Type::Boolean,
478
ProgramInfo.DynamicCallStack);
479
}
480
481
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
482
Kern[".workgroup_processor_mode"] =
483
Kern.getDocument()->getNode(ProgramInfo.WgpMode);
484
485
// FIXME: The metadata treats the minimum as 16?
486
Kern[".kernarg_segment_align"] =
487
Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
488
Kern[".wavefront_size"] =
489
Kern.getDocument()->getNode(STM.getWavefrontSize());
490
DelayedExprs->assignDocNode(Kern[".sgpr_count"], msgpack::Type::UInt,
491
ProgramInfo.NumSGPR);
492
DelayedExprs->assignDocNode(Kern[".vgpr_count"], msgpack::Type::UInt,
493
ProgramInfo.NumVGPR);
494
495
// Only add AGPR count to metadata for supported devices
496
if (STM.hasMAIInsts()) {
497
DelayedExprs->assignDocNode(Kern[".agpr_count"], msgpack::Type::UInt,
498
ProgramInfo.NumAccVGPR);
499
}
500
501
Kern[".max_flat_workgroup_size"] =
502
Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
503
unsigned NumWGX = MFI.getMaxNumWorkGroupsX();
504
unsigned NumWGY = MFI.getMaxNumWorkGroupsY();
505
unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ();
506
if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) {
507
Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);
508
Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);
509
Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);
510
}
511
Kern[".sgpr_spill_count"] =
512
Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
513
Kern[".vgpr_spill_count"] =
514
Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
515
516
return Kern;
517
}
518
519
bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
520
DelayedExprs->resolveDelayedExpressions();
521
return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
522
}
523
524
void MetadataStreamerMsgPackV4::begin(const Module &Mod,
525
const IsaInfo::AMDGPUTargetID &TargetID) {
526
emitVersion();
527
emitTargetID(TargetID);
528
emitPrintf(Mod);
529
getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
530
DelayedExprs->clear();
531
}
532
533
void MetadataStreamerMsgPackV4::end() {
534
DelayedExprs->resolveDelayedExpressions();
535
std::string HSAMetadataString;
536
raw_string_ostream StrOS(HSAMetadataString);
537
HSAMetadataDoc->toYAML(StrOS);
538
539
if (DumpHSAMetadata)
540
dump(StrOS.str());
541
if (VerifyHSAMetadata)
542
verify(StrOS.str());
543
}
544
545
void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
546
const SIProgramInfo &ProgramInfo) {
547
auto &Func = MF.getFunction();
548
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
549
Func.getCallingConv() != CallingConv::SPIR_KERNEL)
550
return;
551
552
auto CodeObjectVersion =
553
AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent());
554
auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
555
556
auto Kernels =
557
getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
558
559
{
560
Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
561
Kern[".symbol"] = Kern.getDocument()->getNode(
562
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
563
emitKernelLanguage(Func, Kern);
564
emitKernelAttrs(Func, Kern);
565
emitKernelArgs(MF, Kern);
566
}
567
568
Kernels.push_back(Kern);
569
}
570
571
//===----------------------------------------------------------------------===//
572
// HSAMetadataStreamerV5
573
//===----------------------------------------------------------------------===//
574
575
void MetadataStreamerMsgPackV5::emitVersion() {
576
auto Version = HSAMetadataDoc->getArrayNode();
577
Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
578
Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
579
getRootMetadata("amdhsa.version") = Version;
580
}
581
582
void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
583
const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
584
auto &Func = MF.getFunction();
585
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
586
587
// No implicit kernel argument is used.
588
if (ST.getImplicitArgNumBytes(Func) == 0)
589
return;
590
591
const Module *M = Func.getParent();
592
auto &DL = M->getDataLayout();
593
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
594
595
auto Int64Ty = Type::getInt64Ty(Func.getContext());
596
auto Int32Ty = Type::getInt32Ty(Func.getContext());
597
auto Int16Ty = Type::getInt16Ty(Func.getContext());
598
599
Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
600
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
601
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
602
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
603
604
emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
605
emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
606
emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
607
608
emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
609
emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
610
emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
611
612
// Reserved for hidden_tool_correlation_id.
613
Offset += 8;
614
615
Offset += 8; // Reserved.
616
617
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
618
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
619
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
620
621
emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
622
623
Offset += 6; // Reserved.
624
auto Int8PtrTy =
625
PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
626
627
if (M->getNamedMetadata("llvm.printf.fmts")) {
628
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
629
Args);
630
} else {
631
Offset += 8; // Skipped.
632
}
633
634
if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
635
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
636
Args);
637
} else {
638
Offset += 8; // Skipped.
639
}
640
641
if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
642
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
643
Args);
644
} else {
645
Offset += 8; // Skipped.
646
}
647
648
if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
649
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
650
else
651
Offset += 8; // Skipped.
652
653
if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
654
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
655
Args);
656
} else {
657
Offset += 8; // Skipped.
658
}
659
660
if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
661
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
662
Args);
663
} else {
664
Offset += 8; // Skipped.
665
}
666
667
// Emit argument for hidden dynamic lds size
668
if (MFI.isDynamicLDSUsed()) {
669
emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset,
670
Args);
671
} else {
672
Offset += 4; // skipped
673
}
674
675
Offset += 68; // Reserved.
676
677
// hidden_private_base and hidden_shared_base are only when the subtarget has
678
// ApertureRegs.
679
if (!ST.hasApertureRegs()) {
680
emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
681
emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
682
} else {
683
Offset += 8; // Skipped.
684
}
685
686
if (MFI.getUserSGPRInfo().hasQueuePtr())
687
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
688
}
689
690
void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
691
msgpack::MapDocNode Kern) {
692
MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern);
693
694
if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
695
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
696
}
697
698
//===----------------------------------------------------------------------===//
699
// HSAMetadataStreamerV6
700
//===----------------------------------------------------------------------===//
701
702
void MetadataStreamerMsgPackV6::emitVersion() {
703
auto Version = HSAMetadataDoc->getArrayNode();
704
Version.push_back(Version.getDocument()->getNode(VersionMajorV6));
705
Version.push_back(Version.getDocument()->getNode(VersionMinorV6));
706
getRootMetadata("amdhsa.version") = Version;
707
}
708
709
} // end namespace AMDGPU::HSAMD
710
} // end namespace llvm
711
712