Path: blob/main/contrib/llvm-project/clang/lib/Basic/Targets/NVPTX.cpp
35294 views
//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements NVPTX TargetInfo objects.9//10//===----------------------------------------------------------------------===//1112#include "NVPTX.h"13#include "Targets.h"14#include "clang/Basic/Builtins.h"15#include "clang/Basic/MacroBuilder.h"16#include "clang/Basic/TargetBuiltins.h"17#include "llvm/ADT/StringSwitch.h"1819using namespace clang;20using namespace clang::targets;2122static constexpr Builtin::Info BuiltinInfo[] = {23#define BUILTIN(ID, TYPE, ATTRS) \24{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},25#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \26{#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},27#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \28{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},29#include "clang/Basic/BuiltinsNVPTX.def"30};3132const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};3334NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,35const TargetOptions &Opts,36unsigned TargetPointerWidth)37: TargetInfo(Triple) {38assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) &&39"NVPTX only supports 32- and 64-bit modes.");4041PTXVersion = 32;42for (const StringRef Feature : Opts.FeaturesAsWritten) {43int PTXV;44if (!Feature.starts_with("+ptx") ||45Feature.drop_front(4).getAsInteger(10, PTXV))46continue;47PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?48}4950TLSSupported = false;51VLASupported = false;52AddrSpaceMap = &NVPTXAddrSpaceMap;53UseAddrSpaceMapMangling = true;54// __bf16 is always available as a load/store only type.55BFloat16Width = BFloat16Align = 16;56BFloat16Format = &llvm::APFloat::BFloat();5758// Define available target features59// These must be defined in sorted order!60NoAsmVariants = true;61GPU = OffloadArch::UNUSED;6263// PTX supports f16 as a fundamental type.64HasLegalHalfType = true;65HasFloat16 = true;6667if (TargetPointerWidth == 32)68resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");69else if (Opts.NVPTXUseShortPointers)70resetDataLayout(71"e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");72else73resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");7475// If possible, get a TargetInfo for our host triple, so we can match its76// types.77llvm::Triple HostTriple(Opts.HostTriple);78if (!HostTriple.isNVPTX())79HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts);8081// If no host target, make some guesses about the data layout and return.82if (!HostTarget) {83LongWidth = LongAlign = TargetPointerWidth;84PointerWidth = PointerAlign = TargetPointerWidth;85switch (TargetPointerWidth) {86case 32:87SizeType = TargetInfo::UnsignedInt;88PtrDiffType = TargetInfo::SignedInt;89IntPtrType = TargetInfo::SignedInt;90break;91case 64:92SizeType = TargetInfo::UnsignedLong;93PtrDiffType = TargetInfo::SignedLong;94IntPtrType = TargetInfo::SignedLong;95break;96default:97llvm_unreachable("TargetPointerWidth must be 32 or 64");98}99100MaxAtomicInlineWidth = TargetPointerWidth;101return;102}103104// Copy properties from host target.105PointerWidth = HostTarget->getPointerWidth(LangAS::Default);106PointerAlign = HostTarget->getPointerAlign(LangAS::Default);107BoolWidth = HostTarget->getBoolWidth();108BoolAlign = HostTarget->getBoolAlign();109IntWidth = HostTarget->getIntWidth();110IntAlign = HostTarget->getIntAlign();111HalfWidth = HostTarget->getHalfWidth();112HalfAlign = HostTarget->getHalfAlign();113FloatWidth = HostTarget->getFloatWidth();114FloatAlign = HostTarget->getFloatAlign();115DoubleWidth = HostTarget->getDoubleWidth();116DoubleAlign = HostTarget->getDoubleAlign();117LongWidth = HostTarget->getLongWidth();118LongAlign = HostTarget->getLongAlign();119LongLongWidth = HostTarget->getLongLongWidth();120LongLongAlign = HostTarget->getLongLongAlign();121MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0,122/* HasNonWeakDef = */ true);123NewAlign = HostTarget->getNewAlign();124DefaultAlignForAttributeAligned =125HostTarget->getDefaultAlignForAttributeAligned();126SizeType = HostTarget->getSizeType();127IntMaxType = HostTarget->getIntMaxType();128PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default);129IntPtrType = HostTarget->getIntPtrType();130WCharType = HostTarget->getWCharType();131WIntType = HostTarget->getWIntType();132Char16Type = HostTarget->getChar16Type();133Char32Type = HostTarget->getChar32Type();134Int64Type = HostTarget->getInt64Type();135SigAtomicType = HostTarget->getSigAtomicType();136ProcessIDType = HostTarget->getProcessIDType();137138UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();139UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();140UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();141ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();142143// This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and144// we need those macros to be identical on host and device, because (among145// other things) they affect which standard library classes are defined, and146// we need all classes to be defined on both the host and device.147MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();148149// Properties intentionally not copied from host:150// - LargeArrayMinWidth, LargeArrayAlign: Not visible across the151// host/device boundary.152// - SuitableAlign: Not visible across the host/device boundary, and may153// correctly be different on host/device, e.g. if host has wider vector154// types than device.155// - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same156// as its double type, but that's not necessarily true on the host.157// TODO: nvcc emits a warning when using long double on device; we should158// do the same.159}160161ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {162return llvm::ArrayRef(GCCRegNames);163}164165bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {166return llvm::StringSwitch<bool>(Feature)167.Cases("ptx", "nvptx", true)168.Default(false);169}170171void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,172MacroBuilder &Builder) const {173Builder.defineMacro("__PTX__");174Builder.defineMacro("__NVPTX__");175176// Skip setting architecture dependent macros if undefined.177if (GPU == OffloadArch::UNUSED && !HostTarget)178return;179180if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {181// Set __CUDA_ARCH__ for the GPU specified.182std::string CUDAArchCode = [this] {183switch (GPU) {184case OffloadArch::GFX600:185case OffloadArch::GFX601:186case OffloadArch::GFX602:187case OffloadArch::GFX700:188case OffloadArch::GFX701:189case OffloadArch::GFX702:190case OffloadArch::GFX703:191case OffloadArch::GFX704:192case OffloadArch::GFX705:193case OffloadArch::GFX801:194case OffloadArch::GFX802:195case OffloadArch::GFX803:196case OffloadArch::GFX805:197case OffloadArch::GFX810:198case OffloadArch::GFX9_GENERIC:199case OffloadArch::GFX900:200case OffloadArch::GFX902:201case OffloadArch::GFX904:202case OffloadArch::GFX906:203case OffloadArch::GFX908:204case OffloadArch::GFX909:205case OffloadArch::GFX90a:206case OffloadArch::GFX90c:207case OffloadArch::GFX940:208case OffloadArch::GFX941:209case OffloadArch::GFX942:210case OffloadArch::GFX10_1_GENERIC:211case OffloadArch::GFX1010:212case OffloadArch::GFX1011:213case OffloadArch::GFX1012:214case OffloadArch::GFX1013:215case OffloadArch::GFX10_3_GENERIC:216case OffloadArch::GFX1030:217case OffloadArch::GFX1031:218case OffloadArch::GFX1032:219case OffloadArch::GFX1033:220case OffloadArch::GFX1034:221case OffloadArch::GFX1035:222case OffloadArch::GFX1036:223case OffloadArch::GFX11_GENERIC:224case OffloadArch::GFX1100:225case OffloadArch::GFX1101:226case OffloadArch::GFX1102:227case OffloadArch::GFX1103:228case OffloadArch::GFX1150:229case OffloadArch::GFX1151:230case OffloadArch::GFX1152:231case OffloadArch::GFX12_GENERIC:232case OffloadArch::GFX1200:233case OffloadArch::GFX1201:234case OffloadArch::AMDGCNSPIRV:235case OffloadArch::Generic:236case OffloadArch::LAST:237break;238case OffloadArch::UNKNOWN:239assert(false && "No GPU arch when compiling CUDA device code.");240return "";241case OffloadArch::UNUSED:242case OffloadArch::SM_20:243return "200";244case OffloadArch::SM_21:245return "210";246case OffloadArch::SM_30:247return "300";248case OffloadArch::SM_32_:249return "320";250case OffloadArch::SM_35:251return "350";252case OffloadArch::SM_37:253return "370";254case OffloadArch::SM_50:255return "500";256case OffloadArch::SM_52:257return "520";258case OffloadArch::SM_53:259return "530";260case OffloadArch::SM_60:261return "600";262case OffloadArch::SM_61:263return "610";264case OffloadArch::SM_62:265return "620";266case OffloadArch::SM_70:267return "700";268case OffloadArch::SM_72:269return "720";270case OffloadArch::SM_75:271return "750";272case OffloadArch::SM_80:273return "800";274case OffloadArch::SM_86:275return "860";276case OffloadArch::SM_87:277return "870";278case OffloadArch::SM_89:279return "890";280case OffloadArch::SM_90:281case OffloadArch::SM_90a:282return "900";283}284llvm_unreachable("unhandled OffloadArch");285}();286Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);287if (GPU == OffloadArch::SM_90a)288Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");289}290}291292ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {293return llvm::ArrayRef(BuiltinInfo,294clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin);295}296297298