Path: blob/main/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.h
35266 views
//===--- AMDGPU.h - Declare AMDGPU target feature support -------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file declares AMDGPU TargetInfo objects.9//10//===----------------------------------------------------------------------===//1112#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H13#define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H1415#include "clang/Basic/TargetID.h"16#include "clang/Basic/TargetInfo.h"17#include "clang/Basic/TargetOptions.h"18#include "llvm/ADT/StringSet.h"19#include "llvm/Support/AMDGPUAddrSpace.h"20#include "llvm/Support/Compiler.h"21#include "llvm/TargetParser/TargetParser.h"22#include "llvm/TargetParser/Triple.h"23#include <optional>2425namespace clang {26namespace targets {2728class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {2930static const char *const GCCRegNames[];3132static const LangASMap AMDGPUDefIsGenMap;33static const LangASMap AMDGPUDefIsPrivMap;3435llvm::AMDGPU::GPUKind GPUKind;36unsigned GPUFeatures;37unsigned WavefrontSize;3839/// Whether to use cumode or WGP mode. True for cumode. False for WGP mode.40bool CUMode;4142/// Whether having image instructions.43bool HasImage = false;4445/// Target ID is device name followed by optional feature name postfixed46/// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sramecc-.47/// If the target ID contains feature+, map it to true.48/// If the target ID contains feature-, map it to false.49/// If the target ID does not contain a feature (default), do not map it.50llvm::StringMap<bool> OffloadArchFeatures;51std::string TargetID;5253bool hasFP64() const {54return getTriple().getArch() == llvm::Triple::amdgcn ||55!!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64);56}5758/// Has fast fma f3259bool hasFastFMAF() const {60return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_FMA_F32);61}6263/// Has fast fma f6464bool hasFastFMA() const {65return getTriple().getArch() == llvm::Triple::amdgcn;66}6768bool hasFMAF() const {69return getTriple().getArch() == llvm::Triple::amdgcn ||70!!(GPUFeatures & llvm::AMDGPU::FEATURE_FMA);71}7273bool hasFullRateDenormalsF32() const {74return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);75}7677bool hasLDEXPF() const {78return getTriple().getArch() == llvm::Triple::amdgcn ||79!!(GPUFeatures & llvm::AMDGPU::FEATURE_LDEXP);80}8182static bool isAMDGCN(const llvm::Triple &TT) {83return TT.getArch() == llvm::Triple::amdgcn;84}8586static bool isR600(const llvm::Triple &TT) {87return TT.getArch() == llvm::Triple::r600;88}8990public:91AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts);9293void setAddressSpaceMap(bool DefaultIsPrivate);9495void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override;9697uint64_t getPointerWidthV(LangAS AS) const override {98if (isR600(getTriple()))99return 32;100unsigned TargetAS = getTargetAddressSpace(AS);101102if (TargetAS == llvm::AMDGPUAS::PRIVATE_ADDRESS ||103TargetAS == llvm::AMDGPUAS::LOCAL_ADDRESS)104return 32;105106return 64;107}108109uint64_t getPointerAlignV(LangAS AddrSpace) const override {110return getPointerWidthV(AddrSpace);111}112113uint64_t getMaxPointerWidth() const override {114return getTriple().getArch() == llvm::Triple::amdgcn ? 64 : 32;115}116117bool hasBFloat16Type() const override { return isAMDGCN(getTriple()); }118119std::string_view getClobbers() const override { return ""; }120121ArrayRef<const char *> getGCCRegNames() const override;122123ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {124return std::nullopt;125}126127/// Accepted register names: (n, m is unsigned integer, n < m)128/// v129/// s130/// a131/// {vn}, {v[n]}132/// {sn}, {s[n]}133/// {an}, {a[n]}134/// {S} , where S is a special register name135////{v[n:m]}136/// {s[n:m]}137/// {a[n:m]}138bool validateAsmConstraint(const char *&Name,139TargetInfo::ConstraintInfo &Info) const override {140static const ::llvm::StringSet<> SpecialRegs({141"exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma",142"flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo",143"exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi",144});145146switch (*Name) {147case 'I':148Info.setRequiresImmediate(-16, 64);149return true;150case 'J':151Info.setRequiresImmediate(-32768, 32767);152return true;153case 'A':154case 'B':155case 'C':156Info.setRequiresImmediate();157return true;158default:159break;160}161162StringRef S(Name);163164if (S == "DA" || S == "DB") {165Name++;166Info.setRequiresImmediate();167return true;168}169170bool HasLeftParen = S.consume_front("{");171if (S.empty())172return false;173if (S.front() != 'v' && S.front() != 's' && S.front() != 'a') {174if (!HasLeftParen)175return false;176auto E = S.find('}');177if (!SpecialRegs.count(S.substr(0, E)))178return false;179S = S.drop_front(E + 1);180if (!S.empty())181return false;182// Found {S} where S is a special register.183Info.setAllowsRegister();184Name = S.data() - 1;185return true;186}187S = S.drop_front();188if (!HasLeftParen) {189if (!S.empty())190return false;191// Found s, v or a.192Info.setAllowsRegister();193Name = S.data() - 1;194return true;195}196bool HasLeftBracket = S.consume_front("[");197unsigned long long N;198if (S.empty() || consumeUnsignedInteger(S, 10, N))199return false;200if (S.consume_front(":")) {201if (!HasLeftBracket)202return false;203unsigned long long M;204if (consumeUnsignedInteger(S, 10, M) || N >= M)205return false;206}207if (HasLeftBracket) {208if (!S.consume_front("]"))209return false;210}211if (!S.consume_front("}"))212return false;213if (!S.empty())214return false;215// Found {vn}, {sn}, {an}, {v[n]}, {s[n]}, {a[n]}, {v[n:m]}, {s[n:m]}216// or {a[n:m]}.217Info.setAllowsRegister();218Name = S.data() - 1;219return true;220}221222// \p Constraint will be left pointing at the last character of223// the constraint. In practice, it won't be changed unless the224// constraint is longer than one character.225std::string convertConstraint(const char *&Constraint) const override {226227StringRef S(Constraint);228if (S == "DA" || S == "DB") {229return std::string("^") + std::string(Constraint++, 2);230}231232const char *Begin = Constraint;233TargetInfo::ConstraintInfo Info("", "");234if (validateAsmConstraint(Constraint, Info))235return std::string(Begin).substr(0, Constraint - Begin + 1);236237Constraint = Begin;238return std::string(1, *Constraint);239}240241bool242initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,243StringRef CPU,244const std::vector<std::string> &FeatureVec) const override;245246ArrayRef<Builtin::Info> getTargetBuiltins() const override;247248bool useFP16ConversionIntrinsics() const override { return false; }249250void getTargetDefines(const LangOptions &Opts,251MacroBuilder &Builder) const override;252253BuiltinVaListKind getBuiltinVaListKind() const override {254return TargetInfo::CharPtrBuiltinVaList;255}256257bool isValidCPUName(StringRef Name) const override {258if (getTriple().getArch() == llvm::Triple::amdgcn)259return llvm::AMDGPU::parseArchAMDGCN(Name) != llvm::AMDGPU::GK_NONE;260return llvm::AMDGPU::parseArchR600(Name) != llvm::AMDGPU::GK_NONE;261}262263void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;264265bool setCPU(const std::string &Name) override {266if (getTriple().getArch() == llvm::Triple::amdgcn) {267GPUKind = llvm::AMDGPU::parseArchAMDGCN(Name);268GPUFeatures = llvm::AMDGPU::getArchAttrAMDGCN(GPUKind);269} else {270GPUKind = llvm::AMDGPU::parseArchR600(Name);271GPUFeatures = llvm::AMDGPU::getArchAttrR600(GPUKind);272}273274return GPUKind != llvm::AMDGPU::GK_NONE;275}276277void setSupportedOpenCLOpts() override {278auto &Opts = getSupportedOpenCLOpts();279Opts["cl_clang_storage_class_specifiers"] = true;280Opts["__cl_clang_variadic_functions"] = true;281Opts["__cl_clang_function_pointers"] = true;282Opts["__cl_clang_non_portable_kernel_param_types"] = true;283Opts["__cl_clang_bitfields"] = true;284285bool IsAMDGCN = isAMDGCN(getTriple());286287Opts["cl_khr_fp64"] = hasFP64();288Opts["__opencl_c_fp64"] = hasFP64();289290if (IsAMDGCN || GPUKind >= llvm::AMDGPU::GK_CEDAR) {291Opts["cl_khr_byte_addressable_store"] = true;292Opts["cl_khr_global_int32_base_atomics"] = true;293Opts["cl_khr_global_int32_extended_atomics"] = true;294Opts["cl_khr_local_int32_base_atomics"] = true;295Opts["cl_khr_local_int32_extended_atomics"] = true;296}297298if (IsAMDGCN) {299Opts["cl_khr_fp16"] = true;300Opts["cl_khr_int64_base_atomics"] = true;301Opts["cl_khr_int64_extended_atomics"] = true;302Opts["cl_khr_mipmap_image"] = true;303Opts["cl_khr_mipmap_image_writes"] = true;304Opts["cl_khr_subgroups"] = true;305Opts["cl_amd_media_ops"] = true;306Opts["cl_amd_media_ops2"] = true;307308Opts["__opencl_c_images"] = true;309Opts["__opencl_c_3d_image_writes"] = true;310Opts["cl_khr_3d_image_writes"] = true;311}312}313314LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const override {315switch (TK) {316case OCLTK_Image:317return LangAS::opencl_constant;318319case OCLTK_ClkEvent:320case OCLTK_Queue:321case OCLTK_ReserveID:322return LangAS::opencl_global;323324default:325return TargetInfo::getOpenCLTypeAddrSpace(TK);326}327}328329LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const override {330switch (AS) {331case 0:332return LangAS::opencl_generic;333case 1:334return LangAS::opencl_global;335case 3:336return LangAS::opencl_local;337case 4:338return LangAS::opencl_constant;339case 5:340return LangAS::opencl_private;341default:342return getLangASFromTargetAS(AS);343}344}345346LangAS getCUDABuiltinAddressSpace(unsigned AS) const override {347switch (AS) {348case 0:349return LangAS::Default;350case 1:351return LangAS::cuda_device;352case 3:353return LangAS::cuda_shared;354case 4:355return LangAS::cuda_constant;356default:357return getLangASFromTargetAS(AS);358}359}360361std::optional<LangAS> getConstantAddressSpace() const override {362return getLangASFromTargetAS(llvm::AMDGPUAS::CONSTANT_ADDRESS);363}364365const llvm::omp::GV &getGridValue() const override {366switch (WavefrontSize) {367case 32:368return llvm::omp::getAMDGPUGridValues<32>();369case 64:370return llvm::omp::getAMDGPUGridValues<64>();371default:372llvm_unreachable("getGridValue not implemented for this wavesize");373}374}375376/// \returns Target specific vtbl ptr address space.377unsigned getVtblPtrAddressSpace() const override {378return static_cast<unsigned>(llvm::AMDGPUAS::CONSTANT_ADDRESS);379}380381/// \returns If a target requires an address within a target specific address382/// space \p AddressSpace to be converted in order to be used, then return the383/// corresponding target specific DWARF address space.384///385/// \returns Otherwise return std::nullopt and no conversion will be emitted386/// in the DWARF.387std::optional<unsigned>388getDWARFAddressSpace(unsigned AddressSpace) const override {389const unsigned DWARF_Private = 1;390const unsigned DWARF_Local = 2;391if (AddressSpace == llvm::AMDGPUAS::PRIVATE_ADDRESS) {392return DWARF_Private;393} else if (AddressSpace == llvm::AMDGPUAS::LOCAL_ADDRESS) {394return DWARF_Local;395} else {396return std::nullopt;397}398}399400CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {401switch (CC) {402default:403return CCCR_Warning;404case CC_C:405case CC_OpenCLKernel:406case CC_AMDGPUKernelCall:407return CCCR_OK;408}409}410411// In amdgcn target the null pointer in global, constant, and generic412// address space has value 0 but in private and local address space has413// value ~0.414uint64_t getNullPointerValue(LangAS AS) const override {415// FIXME: Also should handle region.416return (AS == LangAS::opencl_local || AS == LangAS::opencl_private ||417AS == LangAS::sycl_local || AS == LangAS::sycl_private)418? ~0419: 0;420}421422void setAuxTarget(const TargetInfo *Aux) override;423424bool hasBitIntType() const override { return true; }425426// Record offload arch features since they are needed for defining the427// pre-defined macros.428bool handleTargetFeatures(std::vector<std::string> &Features,429DiagnosticsEngine &Diags) override {430auto TargetIDFeatures =431getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind));432for (const auto &F : Features) {433assert(F.front() == '+' || F.front() == '-');434if (F == "+wavefrontsize64")435WavefrontSize = 64;436else if (F == "+cumode")437CUMode = true;438else if (F == "-cumode")439CUMode = false;440else if (F == "+image-insts")441HasImage = true;442bool IsOn = F.front() == '+';443StringRef Name = StringRef(F).drop_front();444if (!llvm::is_contained(TargetIDFeatures, Name))445continue;446assert(!OffloadArchFeatures.contains(Name));447OffloadArchFeatures[Name] = IsOn;448}449return true;450}451452std::optional<std::string> getTargetID() const override {453if (!isAMDGCN(getTriple()))454return std::nullopt;455// When -target-cpu is not set, we assume generic code that it is valid456// for all GPU and use an empty string as target ID to represent that.457if (GPUKind == llvm::AMDGPU::GK_NONE)458return std::string("");459return getCanonicalTargetID(getArchNameAMDGCN(GPUKind),460OffloadArchFeatures);461}462463bool hasHIPImageSupport() const override { return HasImage; }464};465466} // namespace targets467} // namespace clang468469#endif // LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H470471472