Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
35294 views
//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file9/// AMDGPU HSA Metadata Streamer.10///11//12//===----------------------------------------------------------------------===//1314#include "AMDGPUHSAMetadataStreamer.h"15#include "AMDGPU.h"16#include "GCNSubtarget.h"17#include "MCTargetDesc/AMDGPUTargetStreamer.h"18#include "SIMachineFunctionInfo.h"19#include "SIProgramInfo.h"20#include "llvm/IR/Module.h"21#include "llvm/MC/MCContext.h"22#include "llvm/MC/MCExpr.h"23using namespace llvm;2425static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,26const DataLayout &DL) {27Type *Ty = Arg.getType();28MaybeAlign ArgAlign;29if (Arg.hasByRefAttr()) {30Ty = Arg.getParamByRefType();31ArgAlign = Arg.getParamAlign();32}3334if (!ArgAlign)35ArgAlign = DL.getABITypeAlign(Ty);3637return std::pair(Ty, *ArgAlign);38}3940namespace llvm {4142static cl::opt<bool> DumpHSAMetadata(43"amdgpu-dump-hsa-metadata",44cl::desc("Dump AMDGPU HSA Metadata"));45static cl::opt<bool> VerifyHSAMetadata(46"amdgpu-verify-hsa-metadata",47cl::desc("Verify AMDGPU HSA Metadata"));4849namespace AMDGPU::HSAMD {5051//===----------------------------------------------------------------------===//52// HSAMetadataStreamerV453//===----------------------------------------------------------------------===//5455void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {56errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';57}5859void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {60errs() << "AMDGPU HSA Metadata Parser Test: ";6162msgpack::Document FromHSAMetadataString;6364if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {65errs() << "FAIL\n";66return;67}6869std::string ToHSAMetadataString;70raw_string_ostream StrOS(ToHSAMetadataString);71FromHSAMetadataString.toYAML(StrOS);7273errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';74if (HSAMetadataString != ToHSAMetadataString) {75errs() << "Original input: " << HSAMetadataString << '\n'76<< "Produced output: " << StrOS.str() << '\n';77}78}7980std::optional<StringRef>81MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {82return StringSwitch<std::optional<StringRef>>(AccQual)83.Case("read_only", StringRef("read_only"))84.Case("write_only", StringRef("write_only"))85.Case("read_write", StringRef("read_write"))86.Default(std::nullopt);87}8889std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(90unsigned AddressSpace) const {91switch (AddressSpace) {92case AMDGPUAS::PRIVATE_ADDRESS:93return StringRef("private");94case AMDGPUAS::GLOBAL_ADDRESS:95return StringRef("global");96case AMDGPUAS::CONSTANT_ADDRESS:97return StringRef("constant");98case AMDGPUAS::LOCAL_ADDRESS:99return StringRef("local");100case AMDGPUAS::FLAT_ADDRESS:101return StringRef("generic");102case AMDGPUAS::REGION_ADDRESS:103return StringRef("region");104default:105return std::nullopt;106}107}108109StringRef110MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,111StringRef BaseTypeName) const {112if (TypeQual.contains("pipe"))113return "pipe";114115return StringSwitch<StringRef>(BaseTypeName)116.Case("image1d_t", "image")117.Case("image1d_array_t", "image")118.Case("image1d_buffer_t", "image")119.Case("image2d_t", "image")120.Case("image2d_array_t", "image")121.Case("image2d_array_depth_t", "image")122.Case("image2d_array_msaa_t", "image")123.Case("image2d_array_msaa_depth_t", "image")124.Case("image2d_depth_t", "image")125.Case("image2d_msaa_t", "image")126.Case("image2d_msaa_depth_t", "image")127.Case("image3d_t", "image")128.Case("sampler_t", "sampler")129.Case("queue_t", "queue")130.Default(isa<PointerType>(Ty)131? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS132? "dynamic_shared_pointer"133: "global_buffer")134: "by_value");135}136137std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,138bool Signed) const {139switch (Ty->getTypeID()) {140case Type::IntegerTyID: {141if (!Signed)142return (Twine('u') + getTypeName(Ty, true)).str();143144auto BitWidth = Ty->getIntegerBitWidth();145switch (BitWidth) {146case 8:147return "char";148case 16:149return "short";150case 32:151return "int";152case 64:153return "long";154default:155return (Twine('i') + Twine(BitWidth)).str();156}157}158case Type::HalfTyID:159return "half";160case Type::FloatTyID:161return "float";162case Type::DoubleTyID:163return "double";164case Type::FixedVectorTyID: {165auto VecTy = cast<FixedVectorType>(Ty);166auto ElTy = VecTy->getElementType();167auto NumElements = VecTy->getNumElements();168return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();169}170default:171return "unknown";172}173}174175msgpack::ArrayDocNode176MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {177auto Dims = HSAMetadataDoc->getArrayNode();178if (Node->getNumOperands() != 3)179return Dims;180181for (auto &Op : Node->operands())182Dims.push_back(Dims.getDocument()->getNode(183uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));184return Dims;185}186187void MetadataStreamerMsgPackV4::emitVersion() {188auto Version = HSAMetadataDoc->getArrayNode();189Version.push_back(Version.getDocument()->getNode(VersionMajorV4));190Version.push_back(Version.getDocument()->getNode(VersionMinorV4));191getRootMetadata("amdhsa.version") = Version;192}193194void MetadataStreamerMsgPackV4::emitTargetID(195const IsaInfo::AMDGPUTargetID &TargetID) {196getRootMetadata("amdhsa.target") =197HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);198}199200void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {201auto Node = Mod.getNamedMetadata("llvm.printf.fmts");202if (!Node)203return;204205auto Printf = HSAMetadataDoc->getArrayNode();206for (auto *Op : Node->operands())207if (Op->getNumOperands())208Printf.push_back(Printf.getDocument()->getNode(209cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));210getRootMetadata("amdhsa.printf") = Printf;211}212213void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,214msgpack::MapDocNode Kern) {215// TODO: What about other languages?216auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");217if (!Node || !Node->getNumOperands())218return;219auto Op0 = Node->getOperand(0);220if (Op0->getNumOperands() <= 1)221return;222223Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");224auto LanguageVersion = Kern.getDocument()->getArrayNode();225LanguageVersion.push_back(Kern.getDocument()->getNode(226mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));227LanguageVersion.push_back(Kern.getDocument()->getNode(228mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));229Kern[".language_version"] = LanguageVersion;230}231232void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func,233msgpack::MapDocNode Kern) {234235if (auto Node = Func.getMetadata("reqd_work_group_size"))236Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);237if (auto Node = Func.getMetadata("work_group_size_hint"))238Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);239if (auto Node = Func.getMetadata("vec_type_hint")) {240Kern[".vec_type_hint"] = Kern.getDocument()->getNode(241getTypeName(242cast<ValueAsMetadata>(Node->getOperand(0))->getType(),243mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),244/*Copy=*/true);245}246if (Func.hasFnAttribute("runtime-handle")) {247Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(248Func.getFnAttribute("runtime-handle").getValueAsString().str(),249/*Copy=*/true);250}251if (Func.hasFnAttribute("device-init"))252Kern[".kind"] = Kern.getDocument()->getNode("init");253else if (Func.hasFnAttribute("device-fini"))254Kern[".kind"] = Kern.getDocument()->getNode("fini");255}256257void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,258msgpack::MapDocNode Kern) {259auto &Func = MF.getFunction();260unsigned Offset = 0;261auto Args = HSAMetadataDoc->getArrayNode();262for (auto &Arg : Func.args())263emitKernelArg(Arg, Offset, Args);264265emitHiddenKernelArgs(MF, Offset, Args);266267Kern[".args"] = Args;268}269270void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,271unsigned &Offset,272msgpack::ArrayDocNode Args) {273auto Func = Arg.getParent();274auto ArgNo = Arg.getArgNo();275const MDNode *Node;276277StringRef Name;278Node = Func->getMetadata("kernel_arg_name");279if (Node && ArgNo < Node->getNumOperands())280Name = cast<MDString>(Node->getOperand(ArgNo))->getString();281else if (Arg.hasName())282Name = Arg.getName();283284StringRef TypeName;285Node = Func->getMetadata("kernel_arg_type");286if (Node && ArgNo < Node->getNumOperands())287TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();288289StringRef BaseTypeName;290Node = Func->getMetadata("kernel_arg_base_type");291if (Node && ArgNo < Node->getNumOperands())292BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();293294StringRef ActAccQual;295// Do we really need NoAlias check here?296if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {297if (Arg.onlyReadsMemory())298ActAccQual = "read_only";299else if (Arg.hasAttribute(Attribute::WriteOnly))300ActAccQual = "write_only";301}302303StringRef AccQual;304Node = Func->getMetadata("kernel_arg_access_qual");305if (Node && ArgNo < Node->getNumOperands())306AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();307308StringRef TypeQual;309Node = Func->getMetadata("kernel_arg_type_qual");310if (Node && ArgNo < Node->getNumOperands())311TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();312313const DataLayout &DL = Func->getDataLayout();314315MaybeAlign PointeeAlign;316Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();317318// FIXME: Need to distinguish in memory alignment from pointer alignment.319if (auto PtrTy = dyn_cast<PointerType>(Ty)) {320if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)321PointeeAlign = Arg.getParamAlign().valueOrOne();322}323324// There's no distinction between byval aggregates and raw aggregates.325Type *ArgTy;326Align ArgAlign;327std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);328329emitKernelArg(DL, ArgTy, ArgAlign,330getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,331PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,332AccQual, TypeQual);333}334335void MetadataStreamerMsgPackV4::emitKernelArg(336const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,337unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,338StringRef Name, StringRef TypeName, StringRef BaseTypeName,339StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {340auto Arg = Args.getDocument()->getMapNode();341342if (!Name.empty())343Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);344if (!TypeName.empty())345Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);346auto Size = DL.getTypeAllocSize(Ty);347Arg[".size"] = Arg.getDocument()->getNode(Size);348Offset = alignTo(Offset, Alignment);349Arg[".offset"] = Arg.getDocument()->getNode(Offset);350Offset += Size;351Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);352if (PointeeAlign)353Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());354355if (auto PtrTy = dyn_cast<PointerType>(Ty))356if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))357// Limiting address space to emit only for a certain ValueKind.358if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")359Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,360/*Copy=*/true);361362if (auto AQ = getAccessQualifier(AccQual))363Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);364365if (auto AAQ = getAccessQualifier(ActAccQual))366Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true);367368SmallVector<StringRef, 1> SplitTypeQuals;369TypeQual.split(SplitTypeQuals, " ", -1, false);370for (StringRef Key : SplitTypeQuals) {371if (Key == "const")372Arg[".is_const"] = Arg.getDocument()->getNode(true);373else if (Key == "restrict")374Arg[".is_restrict"] = Arg.getDocument()->getNode(true);375else if (Key == "volatile")376Arg[".is_volatile"] = Arg.getDocument()->getNode(true);377else if (Key == "pipe")378Arg[".is_pipe"] = Arg.getDocument()->getNode(true);379}380381Args.push_back(Arg);382}383384void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(385const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {386auto &Func = MF.getFunction();387const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();388389unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);390if (!HiddenArgNumBytes)391return;392393const Module *M = Func.getParent();394auto &DL = M->getDataLayout();395auto Int64Ty = Type::getInt64Ty(Func.getContext());396397Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());398399if (HiddenArgNumBytes >= 8)400emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,401Args);402if (HiddenArgNumBytes >= 16)403emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,404Args);405if (HiddenArgNumBytes >= 24)406emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,407Args);408409auto Int8PtrTy =410PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);411412if (HiddenArgNumBytes >= 32) {413// We forbid the use of features requiring hostcall when compiling OpenCL414// before code object V5, which makes the mutual exclusion between the415// "printf buffer" and "hostcall buffer" here sound.416if (M->getNamedMetadata("llvm.printf.fmts"))417emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,418Args);419else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))420emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,421Args);422else423emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);424}425426// Emit "default queue" and "completion action" arguments if enqueue kernel is427// used, otherwise emit dummy "none" arguments.428if (HiddenArgNumBytes >= 40) {429if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {430emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,431Args);432} else {433emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);434}435}436437if (HiddenArgNumBytes >= 48) {438if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {439emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,440Args);441} else {442emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);443}444}445446// Emit the pointer argument for multi-grid object.447if (HiddenArgNumBytes >= 56) {448if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {449emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,450Args);451} else {452emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);453}454}455}456457msgpack::MapDocNode458MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,459const SIProgramInfo &ProgramInfo,460unsigned CodeObjectVersion) const {461const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();462const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();463const Function &F = MF.getFunction();464465auto Kern = HSAMetadataDoc->getMapNode();466467Align MaxKernArgAlign;468Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(469STM.getKernArgSegmentSize(F, MaxKernArgAlign));470Kern[".group_segment_fixed_size"] =471Kern.getDocument()->getNode(ProgramInfo.LDSSize);472DelayedExprs->assignDocNode(Kern[".private_segment_fixed_size"],473msgpack::Type::UInt, ProgramInfo.ScratchSize);474if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {475DelayedExprs->assignDocNode(Kern[".uses_dynamic_stack"],476msgpack::Type::Boolean,477ProgramInfo.DynamicCallStack);478}479480if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())481Kern[".workgroup_processor_mode"] =482Kern.getDocument()->getNode(ProgramInfo.WgpMode);483484// FIXME: The metadata treats the minimum as 16?485Kern[".kernarg_segment_align"] =486Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());487Kern[".wavefront_size"] =488Kern.getDocument()->getNode(STM.getWavefrontSize());489DelayedExprs->assignDocNode(Kern[".sgpr_count"], msgpack::Type::UInt,490ProgramInfo.NumSGPR);491DelayedExprs->assignDocNode(Kern[".vgpr_count"], msgpack::Type::UInt,492ProgramInfo.NumVGPR);493494// Only add AGPR count to metadata for supported devices495if (STM.hasMAIInsts()) {496DelayedExprs->assignDocNode(Kern[".agpr_count"], msgpack::Type::UInt,497ProgramInfo.NumAccVGPR);498}499500Kern[".max_flat_workgroup_size"] =501Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());502unsigned NumWGX = MFI.getMaxNumWorkGroupsX();503unsigned NumWGY = MFI.getMaxNumWorkGroupsY();504unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ();505if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) {506Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);507Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);508Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);509}510Kern[".sgpr_spill_count"] =511Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());512Kern[".vgpr_spill_count"] =513Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());514515return Kern;516}517518bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {519DelayedExprs->resolveDelayedExpressions();520return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);521}522523void MetadataStreamerMsgPackV4::begin(const Module &Mod,524const IsaInfo::AMDGPUTargetID &TargetID) {525emitVersion();526emitTargetID(TargetID);527emitPrintf(Mod);528getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();529DelayedExprs->clear();530}531532void MetadataStreamerMsgPackV4::end() {533DelayedExprs->resolveDelayedExpressions();534std::string HSAMetadataString;535raw_string_ostream StrOS(HSAMetadataString);536HSAMetadataDoc->toYAML(StrOS);537538if (DumpHSAMetadata)539dump(StrOS.str());540if (VerifyHSAMetadata)541verify(StrOS.str());542}543544void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,545const SIProgramInfo &ProgramInfo) {546auto &Func = MF.getFunction();547if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&548Func.getCallingConv() != CallingConv::SPIR_KERNEL)549return;550551auto CodeObjectVersion =552AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent());553auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);554555auto Kernels =556getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);557558{559Kern[".name"] = Kern.getDocument()->getNode(Func.getName());560Kern[".symbol"] = Kern.getDocument()->getNode(561(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);562emitKernelLanguage(Func, Kern);563emitKernelAttrs(Func, Kern);564emitKernelArgs(MF, Kern);565}566567Kernels.push_back(Kern);568}569570//===----------------------------------------------------------------------===//571// HSAMetadataStreamerV5572//===----------------------------------------------------------------------===//573574void MetadataStreamerMsgPackV5::emitVersion() {575auto Version = HSAMetadataDoc->getArrayNode();576Version.push_back(Version.getDocument()->getNode(VersionMajorV5));577Version.push_back(Version.getDocument()->getNode(VersionMinorV5));578getRootMetadata("amdhsa.version") = Version;579}580581void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(582const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {583auto &Func = MF.getFunction();584const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();585586// No implicit kernel argument is used.587if (ST.getImplicitArgNumBytes(Func) == 0)588return;589590const Module *M = Func.getParent();591auto &DL = M->getDataLayout();592const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();593594auto Int64Ty = Type::getInt64Ty(Func.getContext());595auto Int32Ty = Type::getInt32Ty(Func.getContext());596auto Int16Ty = Type::getInt16Ty(Func.getContext());597598Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());599emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);600emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);601emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);602603emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);604emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);605emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);606607emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);608emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);609emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);610611// Reserved for hidden_tool_correlation_id.612Offset += 8;613614Offset += 8; // Reserved.615616emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);617emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);618emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);619620emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);621622Offset += 6; // Reserved.623auto Int8PtrTy =624PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);625626if (M->getNamedMetadata("llvm.printf.fmts")) {627emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,628Args);629} else {630Offset += 8; // Skipped.631}632633if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {634emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,635Args);636} else {637Offset += 8; // Skipped.638}639640if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {641emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,642Args);643} else {644Offset += 8; // Skipped.645}646647if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))648emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);649else650Offset += 8; // Skipped.651652if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {653emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,654Args);655} else {656Offset += 8; // Skipped.657}658659if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {660emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,661Args);662} else {663Offset += 8; // Skipped.664}665666// Emit argument for hidden dynamic lds size667if (MFI.isDynamicLDSUsed()) {668emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset,669Args);670} else {671Offset += 4; // skipped672}673674Offset += 68; // Reserved.675676// hidden_private_base and hidden_shared_base are only when the subtarget has677// ApertureRegs.678if (!ST.hasApertureRegs()) {679emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);680emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);681} else {682Offset += 8; // Skipped.683}684685if (MFI.getUserSGPRInfo().hasQueuePtr())686emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);687}688689void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,690msgpack::MapDocNode Kern) {691MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern);692693if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())694Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);695}696697//===----------------------------------------------------------------------===//698// HSAMetadataStreamerV6699//===----------------------------------------------------------------------===//700701void MetadataStreamerMsgPackV6::emitVersion() {702auto Version = HSAMetadataDoc->getArrayNode();703Version.push_back(Version.getDocument()->getNode(VersionMajorV6));704Version.push_back(Version.getDocument()->getNode(VersionMinorV6));705getRootMetadata("amdhsa.version") = Version;706}707708} // end namespace AMDGPU::HSAMD709} // end namespace llvm710711712