Path: blob/main/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp
35234 views
//===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7/// \file8/// This file implements semantic analysis for CUDA constructs.9///10//===----------------------------------------------------------------------===//1112#include "clang/Sema/SemaCUDA.h"13#include "clang/AST/ASTContext.h"14#include "clang/AST/Decl.h"15#include "clang/AST/ExprCXX.h"16#include "clang/Basic/Cuda.h"17#include "clang/Basic/TargetInfo.h"18#include "clang/Lex/Preprocessor.h"19#include "clang/Sema/Lookup.h"20#include "clang/Sema/ScopeInfo.h"21#include "clang/Sema/Sema.h"22#include "clang/Sema/SemaDiagnostic.h"23#include "clang/Sema/SemaInternal.h"24#include "clang/Sema/Template.h"25#include "llvm/ADT/STLForwardCompat.h"26#include "llvm/ADT/SmallVector.h"27#include <optional>28using namespace clang;2930SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {}3132template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {33if (!D)34return false;35if (auto *A = D->getAttr<AttrT>())36return !A->isImplicit();37return false;38}3940void SemaCUDA::PushForceHostDevice() {41assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");42ForceHostDeviceDepth++;43}4445bool SemaCUDA::PopForceHostDevice() {46assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");47if (ForceHostDeviceDepth == 0)48return false;49ForceHostDeviceDepth--;50return true;51}5253ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,54MultiExprArg ExecConfig,55SourceLocation GGGLoc) {56FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl();57if (!ConfigDecl)58return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)59<< getConfigureFuncName());60QualType ConfigQTy = ConfigDecl->getType();6162DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr(63getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);64SemaRef.MarkFunctionReferenced(LLLLoc, ConfigDecl);6566return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,67/*IsExecConfig=*/true);68}6970CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {71bool HasHostAttr = false;72bool HasDeviceAttr = false;73bool HasGlobalAttr = false;74bool HasInvalidTargetAttr = false;75for (const ParsedAttr &AL : Attrs) {76switch (AL.getKind()) {77case ParsedAttr::AT_CUDAGlobal:78HasGlobalAttr = true;79break;80case ParsedAttr::AT_CUDAHost:81HasHostAttr = true;82break;83case ParsedAttr::AT_CUDADevice:84HasDeviceAttr = true;85break;86case ParsedAttr::AT_CUDAInvalidTarget:87HasInvalidTargetAttr = true;88break;89default:90break;91}92}9394if (HasInvalidTargetAttr)95return CUDAFunctionTarget::InvalidTarget;9697if (HasGlobalAttr)98return CUDAFunctionTarget::Global;99100if (HasHostAttr && HasDeviceAttr)101return CUDAFunctionTarget::HostDevice;102103if (HasDeviceAttr)104return CUDAFunctionTarget::Device;105106return CUDAFunctionTarget::Host;107}108109template <typename A>110static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {111return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {112return isa<A>(Attribute) &&113!(IgnoreImplicitAttr && Attribute->isImplicit());114});115}116117SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII(118SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D)119: S(S_) {120SavedCtx = S.CurCUDATargetCtx;121assert(K == SemaCUDA::CTCK_InitGlobalVar);122auto *VD = dyn_cast_or_null<VarDecl>(D);123if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {124auto Target = CUDAFunctionTarget::Host;125if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&126!hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||127hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||128hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))129Target = CUDAFunctionTarget::Device;130S.CurCUDATargetCtx = {Target, K, VD};131}132}133134/// IdentifyTarget - Determine the CUDA compilation target for this function135CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,136bool IgnoreImplicitHDAttr) {137// Code that lives outside a function gets the target from CurCUDATargetCtx.138if (D == nullptr)139return CurCUDATargetCtx.Target;140141if (D->hasAttr<CUDAInvalidTargetAttr>())142return CUDAFunctionTarget::InvalidTarget;143144if (D->hasAttr<CUDAGlobalAttr>())145return CUDAFunctionTarget::Global;146147if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {148if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))149return CUDAFunctionTarget::HostDevice;150return CUDAFunctionTarget::Device;151} else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {152return CUDAFunctionTarget::Host;153} else if ((D->isImplicit() || !D->isUserProvided()) &&154!IgnoreImplicitHDAttr) {155// Some implicit declarations (like intrinsic functions) are not marked.156// Set the most lenient target on them for maximal flexibility.157return CUDAFunctionTarget::HostDevice;158}159160return CUDAFunctionTarget::Host;161}162163/// IdentifyTarget - Determine the CUDA compilation target for this variable.164SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) {165if (Var->hasAttr<HIPManagedAttr>())166return CVT_Unified;167// Only constexpr and const variabless with implicit constant attribute168// are emitted on both sides. Such variables are promoted to device side169// only if they have static constant intializers on device side.170if ((Var->isConstexpr() || Var->getType().isConstQualified()) &&171Var->hasAttr<CUDAConstantAttr>() &&172!hasExplicitAttr<CUDAConstantAttr>(Var))173return CVT_Both;174if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||175Var->hasAttr<CUDASharedAttr>() ||176Var->getType()->isCUDADeviceBuiltinSurfaceType() ||177Var->getType()->isCUDADeviceBuiltinTextureType())178return CVT_Device;179// Function-scope static variable without explicit device or constant180// attribute are emitted181// - on both sides in host device functions182// - on device side in device or global functions183if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {184switch (IdentifyTarget(FD)) {185case CUDAFunctionTarget::HostDevice:186return CVT_Both;187case CUDAFunctionTarget::Device:188case CUDAFunctionTarget::Global:189return CVT_Device;190default:191return CVT_Host;192}193}194return CVT_Host;195}196197// * CUDA Call preference table198//199// F - from,200// T - to201// Ph - preference in host mode202// Pd - preference in device mode203// H - handled in (x)204// Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.205//206// | F | T | Ph | Pd | H |207// |----+----+-----+-----+-----+208// | d | d | N | N | (c) |209// | d | g | -- | -- | (a) |210// | d | h | -- | -- | (e) |211// | d | hd | HD | HD | (b) |212// | g | d | N | N | (c) |213// | g | g | -- | -- | (a) |214// | g | h | -- | -- | (e) |215// | g | hd | HD | HD | (b) |216// | h | d | -- | -- | (e) |217// | h | g | N | N | (c) |218// | h | h | N | N | (c) |219// | h | hd | HD | HD | (b) |220// | hd | d | WS | SS | (d) |221// | hd | g | SS | -- |(d/a)|222// | hd | h | SS | WS | (d) |223// | hd | hd | HD | HD | (b) |224225SemaCUDA::CUDAFunctionPreference226SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,227const FunctionDecl *Callee) {228assert(Callee && "Callee must be valid.");229230// Treat ctor/dtor as host device function in device var initializer to allow231// trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor232// will be diagnosed by checkAllowedInitializer.233if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&234CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&235(isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee)))236return CFP_HostDevice;237238CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller);239CUDAFunctionTarget CalleeTarget = IdentifyTarget(Callee);240241// If one of the targets is invalid, the check always fails, no matter what242// the other target is.243if (CallerTarget == CUDAFunctionTarget::InvalidTarget ||244CalleeTarget == CUDAFunctionTarget::InvalidTarget)245return CFP_Never;246247// (a) Can't call global from some contexts until we support CUDA's248// dynamic parallelism.249if (CalleeTarget == CUDAFunctionTarget::Global &&250(CallerTarget == CUDAFunctionTarget::Global ||251CallerTarget == CUDAFunctionTarget::Device))252return CFP_Never;253254// (b) Calling HostDevice is OK for everyone.255if (CalleeTarget == CUDAFunctionTarget::HostDevice)256return CFP_HostDevice;257258// (c) Best case scenarios259if (CalleeTarget == CallerTarget ||260(CallerTarget == CUDAFunctionTarget::Host &&261CalleeTarget == CUDAFunctionTarget::Global) ||262(CallerTarget == CUDAFunctionTarget::Global &&263CalleeTarget == CUDAFunctionTarget::Device))264return CFP_Native;265266// HipStdPar mode is special, in that assessing whether a device side call to267// a host target is deferred to a subsequent pass, and cannot unambiguously be268// adjudicated in the AST, hence we optimistically allow them to pass here.269if (getLangOpts().HIPStdPar &&270(CallerTarget == CUDAFunctionTarget::Global ||271CallerTarget == CUDAFunctionTarget::Device ||272CallerTarget == CUDAFunctionTarget::HostDevice) &&273CalleeTarget == CUDAFunctionTarget::Host)274return CFP_HostDevice;275276// (d) HostDevice behavior depends on compilation mode.277if (CallerTarget == CUDAFunctionTarget::HostDevice) {278// It's OK to call a compilation-mode matching function from an HD one.279if ((getLangOpts().CUDAIsDevice &&280CalleeTarget == CUDAFunctionTarget::Device) ||281(!getLangOpts().CUDAIsDevice &&282(CalleeTarget == CUDAFunctionTarget::Host ||283CalleeTarget == CUDAFunctionTarget::Global)))284return CFP_SameSide;285286// Calls from HD to non-mode-matching functions (i.e., to host functions287// when compiling in device mode or to device functions when compiling in288// host mode) are allowed at the sema level, but eventually rejected if289// they're ever codegened. TODO: Reject said calls earlier.290return CFP_WrongSide;291}292293// (e) Calling across device/host boundary is not something you should do.294if ((CallerTarget == CUDAFunctionTarget::Host &&295CalleeTarget == CUDAFunctionTarget::Device) ||296(CallerTarget == CUDAFunctionTarget::Device &&297CalleeTarget == CUDAFunctionTarget::Host) ||298(CallerTarget == CUDAFunctionTarget::Global &&299CalleeTarget == CUDAFunctionTarget::Host))300return CFP_Never;301302llvm_unreachable("All cases should've been handled by now.");303}304305template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {306if (!D)307return false;308if (auto *A = D->getAttr<AttrT>())309return A->isImplicit();310return D->isImplicit();311}312313bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) {314bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);315bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);316return IsImplicitDevAttr && IsImplicitHostAttr;317}318319void SemaCUDA::EraseUnwantedMatches(320const FunctionDecl *Caller,321SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {322if (Matches.size() <= 1)323return;324325using Pair = std::pair<DeclAccessPair, FunctionDecl*>;326327// Gets the CUDA function preference for a call from Caller to Match.328auto GetCFP = [&](const Pair &Match) {329return IdentifyPreference(Caller, Match.second);330};331332// Find the best call preference among the functions in Matches.333CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(334Matches.begin(), Matches.end(),335[&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));336337// Erase all functions with lower priority.338llvm::erase_if(Matches,339[&](const Pair &Match) { return GetCFP(Match) < BestCFP; });340}341342/// When an implicitly-declared special member has to invoke more than one343/// base/field special member, conflicts may occur in the targets of these344/// members. For example, if one base's member __host__ and another's is345/// __device__, it's a conflict.346/// This function figures out if the given targets \param Target1 and347/// \param Target2 conflict, and if they do not it fills in348/// \param ResolvedTarget with a target that resolves for both calls.349/// \return true if there's a conflict, false otherwise.350static bool351resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1,352CUDAFunctionTarget Target2,353CUDAFunctionTarget *ResolvedTarget) {354// Only free functions and static member functions may be global.355assert(Target1 != CUDAFunctionTarget::Global);356assert(Target2 != CUDAFunctionTarget::Global);357358if (Target1 == CUDAFunctionTarget::HostDevice) {359*ResolvedTarget = Target2;360} else if (Target2 == CUDAFunctionTarget::HostDevice) {361*ResolvedTarget = Target1;362} else if (Target1 != Target2) {363return true;364} else {365*ResolvedTarget = Target1;366}367368return false;369}370371bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,372CXXSpecialMemberKind CSM,373CXXMethodDecl *MemberDecl,374bool ConstRHS,375bool Diagnose) {376// If the defaulted special member is defined lexically outside of its377// owning class, or the special member already has explicit device or host378// attributes, do not infer.379bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();380bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();381bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();382bool HasExplicitAttr =383(HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) ||384(HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());385if (!InClass || HasExplicitAttr)386return false;387388std::optional<CUDAFunctionTarget> InferredTarget;389390// We're going to invoke special member lookup; mark that these special391// members are called from this one, and not from its caller.392Sema::ContextRAII MethodContext(SemaRef, MemberDecl);393394// Look for special members in base classes that should be invoked from here.395// Infer the target of this member base on the ones it should call.396// Skip direct and indirect virtual bases for abstract classes.397llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;398for (const auto &B : ClassDecl->bases()) {399if (!B.isVirtual()) {400Bases.push_back(&B);401}402}403404if (!ClassDecl->isAbstract()) {405llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases()));406}407408for (const auto *B : Bases) {409const RecordType *BaseType = B->getType()->getAs<RecordType>();410if (!BaseType) {411continue;412}413414CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());415Sema::SpecialMemberOverloadResult SMOR =416SemaRef.LookupSpecialMember(BaseClassDecl, CSM,417/* ConstArg */ ConstRHS,418/* VolatileArg */ false,419/* RValueThis */ false,420/* ConstThis */ false,421/* VolatileThis */ false);422423if (!SMOR.getMethod())424continue;425426CUDAFunctionTarget BaseMethodTarget = IdentifyTarget(SMOR.getMethod());427if (!InferredTarget) {428InferredTarget = BaseMethodTarget;429} else {430bool ResolutionError = resolveCalleeCUDATargetConflict(431*InferredTarget, BaseMethodTarget, &*InferredTarget);432if (ResolutionError) {433if (Diagnose) {434Diag(ClassDecl->getLocation(),435diag::note_implicit_member_target_infer_collision)436<< (unsigned)CSM << llvm::to_underlying(*InferredTarget)437<< llvm::to_underlying(BaseMethodTarget);438}439MemberDecl->addAttr(440CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));441return true;442}443}444}445446// Same as for bases, but now for special members of fields.447for (const auto *F : ClassDecl->fields()) {448if (F->isInvalidDecl()) {449continue;450}451452const RecordType *FieldType =453getASTContext().getBaseElementType(F->getType())->getAs<RecordType>();454if (!FieldType) {455continue;456}457458CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());459Sema::SpecialMemberOverloadResult SMOR =460SemaRef.LookupSpecialMember(FieldRecDecl, CSM,461/* ConstArg */ ConstRHS && !F->isMutable(),462/* VolatileArg */ false,463/* RValueThis */ false,464/* ConstThis */ false,465/* VolatileThis */ false);466467if (!SMOR.getMethod())468continue;469470CUDAFunctionTarget FieldMethodTarget = IdentifyTarget(SMOR.getMethod());471if (!InferredTarget) {472InferredTarget = FieldMethodTarget;473} else {474bool ResolutionError = resolveCalleeCUDATargetConflict(475*InferredTarget, FieldMethodTarget, &*InferredTarget);476if (ResolutionError) {477if (Diagnose) {478Diag(ClassDecl->getLocation(),479diag::note_implicit_member_target_infer_collision)480<< (unsigned)CSM << llvm::to_underlying(*InferredTarget)481<< llvm::to_underlying(FieldMethodTarget);482}483MemberDecl->addAttr(484CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));485return true;486}487}488}489490491// If no target was inferred, mark this member as __host__ __device__;492// it's the least restrictive option that can be invoked from any target.493bool NeedsH = true, NeedsD = true;494if (InferredTarget) {495if (*InferredTarget == CUDAFunctionTarget::Device)496NeedsH = false;497else if (*InferredTarget == CUDAFunctionTarget::Host)498NeedsD = false;499}500501// We either setting attributes first time, or the inferred ones must match502// previously set ones.503if (NeedsD && !HasD)504MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));505if (NeedsH && !HasH)506MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));507508return false;509}510511bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {512if (!CD->isDefined() && CD->isTemplateInstantiation())513SemaRef.InstantiateFunctionDefinition(Loc, CD->getFirstDecl());514515// (E.2.3.1, CUDA 7.5) A constructor for a class type is considered516// empty at a point in the translation unit, if it is either a517// trivial constructor518if (CD->isTrivial())519return true;520521// ... or it satisfies all of the following conditions:522// The constructor function has been defined.523// The constructor function has no parameters,524// and the function body is an empty compound statement.525if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))526return false;527528// Its class has no virtual functions and no virtual base classes.529if (CD->getParent()->isDynamicClass())530return false;531532// Union ctor does not call ctors of its data members.533if (CD->getParent()->isUnion())534return true;535536// The only form of initializer allowed is an empty constructor.537// This will recursively check all base classes and member initializers538if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {539if (const CXXConstructExpr *CE =540dyn_cast<CXXConstructExpr>(CI->getInit()))541return isEmptyConstructor(Loc, CE->getConstructor());542return false;543}))544return false;545546return true;547}548549bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {550// No destructor -> no problem.551if (!DD)552return true;553554if (!DD->isDefined() && DD->isTemplateInstantiation())555SemaRef.InstantiateFunctionDefinition(Loc, DD->getFirstDecl());556557// (E.2.3.1, CUDA 7.5) A destructor for a class type is considered558// empty at a point in the translation unit, if it is either a559// trivial constructor560if (DD->isTrivial())561return true;562563// ... or it satisfies all of the following conditions:564// The destructor function has been defined.565// and the function body is an empty compound statement.566if (!DD->hasTrivialBody())567return false;568569const CXXRecordDecl *ClassDecl = DD->getParent();570571// Its class has no virtual functions and no virtual base classes.572if (ClassDecl->isDynamicClass())573return false;574575// Union does not have base class and union dtor does not call dtors of its576// data members.577if (DD->getParent()->isUnion())578return true;579580// Only empty destructors are allowed. This will recursively check581// destructors for all base classes...582if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {583if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())584return isEmptyDestructor(Loc, RD->getDestructor());585return true;586}))587return false;588589// ... and member fields.590if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {591if (CXXRecordDecl *RD = Field->getType()592->getBaseElementTypeUnsafe()593->getAsCXXRecordDecl())594return isEmptyDestructor(Loc, RD->getDestructor());595return true;596}))597return false;598599return true;600}601602namespace {603enum CUDAInitializerCheckKind {604CICK_DeviceOrConstant, // Check initializer for device/constant variable605CICK_Shared, // Check initializer for shared variable606};607608bool IsDependentVar(VarDecl *VD) {609if (VD->getType()->isDependentType())610return true;611if (const auto *Init = VD->getInit())612return Init->isValueDependent();613return false;614}615616// Check whether a variable has an allowed initializer for a CUDA device side617// variable with global storage. \p VD may be a host variable to be checked for618// potential promotion to device side variable.619//620// CUDA/HIP allows only empty constructors as initializers for global621// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all622// __shared__ variables whether they are local or not (they all are implicitly623// static in CUDA). One exception is that CUDA allows constant initializers624// for __constant__ and __device__ variables.625bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD,626CUDAInitializerCheckKind CheckKind) {627assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());628assert(!IsDependentVar(VD) && "do not check dependent var");629const Expr *Init = VD->getInit();630auto IsEmptyInit = [&](const Expr *Init) {631if (!Init)632return true;633if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) {634return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor());635}636return false;637};638auto IsConstantInit = [&](const Expr *Init) {639assert(Init);640ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(),641/*NoWronSidedVars=*/true);642return Init->isConstantInitializer(S.getASTContext(),643VD->getType()->isReferenceType());644};645auto HasEmptyDtor = [&](VarDecl *VD) {646if (const auto *RD = VD->getType()->getAsCXXRecordDecl())647return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor());648return true;649};650if (CheckKind == CICK_Shared)651return IsEmptyInit(Init) && HasEmptyDtor(VD);652return S.getLangOpts().GPUAllowDeviceInit ||653((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD));654}655} // namespace656657void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {658// Return early if VD is inside a non-instantiated template function since659// the implicit constructor is not defined yet.660if (const FunctionDecl *FD =661dyn_cast_or_null<FunctionDecl>(VD->getDeclContext()))662if (FD->isDependentContext())663return;664665// Do not check dependent variables since the ctor/dtor/initializer are not666// determined. Do it after instantiation.667if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() ||668IsDependentVar(VD))669return;670const Expr *Init = VD->getInit();671bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();672bool IsDeviceOrConstantVar =673!IsSharedVar &&674(VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>());675if (IsDeviceOrConstantVar || IsSharedVar) {676if (HasAllowedCUDADeviceStaticInitializer(677*this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))678return;679Diag(VD->getLocation(),680IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)681<< Init->getSourceRange();682VD->setInvalidDecl();683} else {684// This is a host-side global variable. Check that the initializer is685// callable from the host side.686const FunctionDecl *InitFn = nullptr;687if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {688InitFn = CE->getConstructor();689} else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {690InitFn = CE->getDirectCallee();691}692if (InitFn) {693CUDAFunctionTarget InitFnTarget = IdentifyTarget(InitFn);694if (InitFnTarget != CUDAFunctionTarget::Host &&695InitFnTarget != CUDAFunctionTarget::HostDevice) {696Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)697<< llvm::to_underlying(InitFnTarget) << InitFn;698Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;699VD->setInvalidDecl();700}701}702}703}704705void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(706const FunctionDecl *Callee) {707FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);708if (!Caller)709return;710711if (!isImplicitHostDeviceFunction(Callee))712return;713714CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller);715716// Record whether an implicit host device function is used on device side.717if (CallerTarget != CUDAFunctionTarget::Device &&718CallerTarget != CUDAFunctionTarget::Global &&719(CallerTarget != CUDAFunctionTarget::HostDevice ||720(isImplicitHostDeviceFunction(Caller) &&721!getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller))))722return;723724getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee);725}726727// With -fcuda-host-device-constexpr, an unattributed constexpr function is728// treated as implicitly __host__ __device__, unless:729// * it is a variadic function (device-side variadic functions are not730// allowed), or731// * a __device__ function with this signature was already declared, in which732// case in which case we output an error, unless the __device__ decl is in a733// system header, in which case we leave the constexpr function unattributed.734//735// In addition, all function decls are treated as __host__ __device__ when736// ForceHostDeviceDepth > 0 (corresponding to code within a737// #pragma clang force_cuda_host_device_begin/end738// pair).739void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD,740const LookupResult &Previous) {741assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");742743if (ForceHostDeviceDepth > 0) {744if (!NewD->hasAttr<CUDAHostAttr>())745NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));746if (!NewD->hasAttr<CUDADeviceAttr>())747NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));748return;749}750751// If a template function has no host/device/global attributes,752// make it implicitly host device function.753if (getLangOpts().OffloadImplicitHostDeviceTemplates &&754!NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&755!NewD->hasAttr<CUDAGlobalAttr>() &&756(NewD->getDescribedFunctionTemplate() ||757NewD->isFunctionTemplateSpecialization())) {758NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));759NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));760return;761}762763if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||764NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||765NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())766return;767768// Is D a __device__ function with the same signature as NewD, ignoring CUDA769// attributes?770auto IsMatchingDeviceFn = [&](NamedDecl *D) {771if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))772D = Using->getTargetDecl();773FunctionDecl *OldD = D->getAsFunction();774return OldD && OldD->hasAttr<CUDADeviceAttr>() &&775!OldD->hasAttr<CUDAHostAttr>() &&776!SemaRef.IsOverload(NewD, OldD,777/* UseMemberUsingDeclRules = */ false,778/* ConsiderCudaAttrs = */ false);779};780auto It = llvm::find_if(Previous, IsMatchingDeviceFn);781if (It != Previous.end()) {782// We found a __device__ function with the same name and signature as NewD783// (ignoring CUDA attrs). This is an error unless that function is defined784// in a system header, in which case we simply return without making NewD785// host+device.786NamedDecl *Match = *It;787if (!SemaRef.getSourceManager().isInSystemHeader(Match->getLocation())) {788Diag(NewD->getLocation(),789diag::err_cuda_unattributed_constexpr_cannot_overload_device)790<< NewD;791Diag(Match->getLocation(),792diag::note_cuda_conflicting_device_function_declared_here);793}794return;795}796797NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));798NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));799}800801// TODO: `__constant__` memory may be a limited resource for certain targets.802// A safeguard may be needed at the end of compilation pipeline if803// `__constant__` memory usage goes beyond limit.804void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) {805// Do not promote dependent variables since the cotr/dtor/initializer are806// not determined. Do it after instantiation.807if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&808!VD->hasAttr<CUDASharedAttr>() &&809(VD->isFileVarDecl() || VD->isStaticDataMember()) &&810!IsDependentVar(VD) &&811((VD->isConstexpr() || VD->getType().isConstQualified()) &&812HasAllowedCUDADeviceStaticInitializer(*this, VD,813CICK_DeviceOrConstant))) {814VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));815}816}817818SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc,819unsigned DiagID) {820assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");821FunctionDecl *CurFunContext =822SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);823SemaDiagnosticBuilder::Kind DiagKind = [&] {824if (!CurFunContext)825return SemaDiagnosticBuilder::K_Nop;826switch (CurrentTarget()) {827case CUDAFunctionTarget::Global:828case CUDAFunctionTarget::Device:829return SemaDiagnosticBuilder::K_Immediate;830case CUDAFunctionTarget::HostDevice:831// An HD function counts as host code if we're compiling for host, and832// device code if we're compiling for device. Defer any errors in device833// mode until the function is known-emitted.834if (!getLangOpts().CUDAIsDevice)835return SemaDiagnosticBuilder::K_Nop;836if (SemaRef.IsLastErrorImmediate &&837getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID))838return SemaDiagnosticBuilder::K_Immediate;839return (SemaRef.getEmissionStatus(CurFunContext) ==840Sema::FunctionEmissionStatus::Emitted)841? SemaDiagnosticBuilder::K_ImmediateWithCallStack842: SemaDiagnosticBuilder::K_Deferred;843default:844return SemaDiagnosticBuilder::K_Nop;845}846}();847return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef);848}849850Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc,851unsigned DiagID) {852assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");853FunctionDecl *CurFunContext =854SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);855SemaDiagnosticBuilder::Kind DiagKind = [&] {856if (!CurFunContext)857return SemaDiagnosticBuilder::K_Nop;858switch (CurrentTarget()) {859case CUDAFunctionTarget::Host:860return SemaDiagnosticBuilder::K_Immediate;861case CUDAFunctionTarget::HostDevice:862// An HD function counts as host code if we're compiling for host, and863// device code if we're compiling for device. Defer any errors in device864// mode until the function is known-emitted.865if (getLangOpts().CUDAIsDevice)866return SemaDiagnosticBuilder::K_Nop;867if (SemaRef.IsLastErrorImmediate &&868getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID))869return SemaDiagnosticBuilder::K_Immediate;870return (SemaRef.getEmissionStatus(CurFunContext) ==871Sema::FunctionEmissionStatus::Emitted)872? SemaDiagnosticBuilder::K_ImmediateWithCallStack873: SemaDiagnosticBuilder::K_Deferred;874default:875return SemaDiagnosticBuilder::K_Nop;876}877}();878return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef);879}880881bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {882assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");883assert(Callee && "Callee may not be null.");884885const auto &ExprEvalCtx = SemaRef.currentEvaluationContext();886if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())887return true;888889// FIXME: Is bailing out early correct here? Should we instead assume that890// the caller is a global initializer?891FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);892if (!Caller)893return true;894895// If the caller is known-emitted, mark the callee as known-emitted.896// Otherwise, mark the call in our call graph so we can traverse it later.897bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) ==898Sema::FunctionEmissionStatus::Emitted;899SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,900CallerKnownEmitted] {901switch (IdentifyPreference(Caller, Callee)) {902case CFP_Never:903case CFP_WrongSide:904assert(Caller && "Never/wrongSide calls require a non-null caller");905// If we know the caller will be emitted, we know this wrong-side call906// will be emitted, so it's an immediate error. Otherwise, defer the907// error until we know the caller is emitted.908return CallerKnownEmitted909? SemaDiagnosticBuilder::K_ImmediateWithCallStack910: SemaDiagnosticBuilder::K_Deferred;911default:912return SemaDiagnosticBuilder::K_Nop;913}914}();915916if (DiagKind == SemaDiagnosticBuilder::K_Nop) {917// For -fgpu-rdc, keep track of external kernels used by host functions.918if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode &&919Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&920(!Caller || (!Caller->getDescribedFunctionTemplate() &&921getASTContext().GetGVALinkageForFunction(Caller) ==922GVA_StrongExternal)))923getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);924return true;925}926927// Avoid emitting this error twice for the same location. Using a hashtable928// like this is unfortunate, but because we must continue parsing as normal929// after encountering a deferred error, it's otherwise very tricky for us to930// ensure that we only emit this deferred error once.931if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)932return true;933934SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller,935SemaRef)936<< llvm::to_underlying(IdentifyTarget(Callee)) << /*function*/ 0 << Callee937<< llvm::to_underlying(IdentifyTarget(Caller));938if (!Callee->getBuiltinID())939SemaDiagnosticBuilder(DiagKind, Callee->getLocation(),940diag::note_previous_decl, Caller, SemaRef)941<< Callee;942return DiagKind != SemaDiagnosticBuilder::K_Immediate &&943DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;944}945946// Check the wrong-sided reference capture of lambda for CUDA/HIP.947// A lambda function may capture a stack variable by reference when it is948// defined and uses the capture by reference when the lambda is called. When949// the capture and use happen on different sides, the capture is invalid and950// should be diagnosed.951void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee,952const sema::Capture &Capture) {953// In host compilation we only need to check lambda functions emitted on host954// side. In such lambda functions, a reference capture is invalid only955// if the lambda structure is populated by a device function or kernel then956// is passed to and called by a host function. However that is impossible,957// since a device function or kernel can only call a device function, also a958// kernel cannot pass a lambda back to a host function since we cannot959// define a kernel argument type which can hold the lambda before the lambda960// itself is defined.961if (!getLangOpts().CUDAIsDevice)962return;963964// File-scope lambda can only do init captures for global variables, which965// results in passing by value for these global variables.966FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);967if (!Caller)968return;969970// In device compilation, we only need to check lambda functions which are971// emitted on device side. For such lambdas, a reference capture is invalid972// only if the lambda structure is populated by a host function then passed973// to and called in a device function or kernel.974bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();975bool CallerIsHost =976!Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();977bool ShouldCheck = CalleeIsDevice && CallerIsHost;978if (!ShouldCheck || !Capture.isReferenceCapture())979return;980auto DiagKind = SemaDiagnosticBuilder::K_Deferred;981if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {982SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),983diag::err_capture_bad_target, Callee, SemaRef)984<< Capture.getVariable();985} else if (Capture.isThisCapture()) {986// Capture of this pointer is allowed since this pointer may be pointing to987// managed memory which is accessible on both device and host sides. It only988// results in invalid memory access if this pointer points to memory not989// accessible on device side.990SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),991diag::warn_maybe_capture_bad_target_this_ptr, Callee,992SemaRef);993}994}995996void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) {997assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");998if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())999return;1000Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));1001Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));1002}10031004void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD,1005const LookupResult &Previous) {1006assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");1007CUDAFunctionTarget NewTarget = IdentifyTarget(NewFD);1008for (NamedDecl *OldND : Previous) {1009FunctionDecl *OldFD = OldND->getAsFunction();1010if (!OldFD)1011continue;10121013CUDAFunctionTarget OldTarget = IdentifyTarget(OldFD);1014// Don't allow HD and global functions to overload other functions with the1015// same signature. We allow overloading based on CUDA attributes so that1016// functions can have different implementations on the host and device, but1017// HD/global functions "exist" in some sense on both the host and device, so1018// should have the same implementation on both sides.1019if (NewTarget != OldTarget &&1020!SemaRef.IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,1021/* ConsiderCudaAttrs = */ false)) {1022if ((NewTarget == CUDAFunctionTarget::HostDevice &&1023!(getLangOpts().OffloadImplicitHostDeviceTemplates &&1024isImplicitHostDeviceFunction(NewFD) &&1025OldTarget == CUDAFunctionTarget::Device)) ||1026(OldTarget == CUDAFunctionTarget::HostDevice &&1027!(getLangOpts().OffloadImplicitHostDeviceTemplates &&1028isImplicitHostDeviceFunction(OldFD) &&1029NewTarget == CUDAFunctionTarget::Device)) ||1030(NewTarget == CUDAFunctionTarget::Global) ||1031(OldTarget == CUDAFunctionTarget::Global)) {1032Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)1033<< llvm::to_underlying(NewTarget) << NewFD->getDeclName()1034<< llvm::to_underlying(OldTarget) << OldFD;1035Diag(OldFD->getLocation(), diag::note_previous_declaration);1036NewFD->setInvalidDecl();1037break;1038}1039if ((NewTarget == CUDAFunctionTarget::Host &&1040OldTarget == CUDAFunctionTarget::Device) ||1041(NewTarget == CUDAFunctionTarget::Device &&1042OldTarget == CUDAFunctionTarget::Host)) {1043Diag(NewFD->getLocation(), diag::warn_offload_incompatible_redeclare)1044<< llvm::to_underlying(NewTarget) << llvm::to_underlying(OldTarget);1045Diag(OldFD->getLocation(), diag::note_previous_declaration);1046}1047}1048}1049}10501051template <typename AttrTy>1052static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,1053const FunctionDecl &TemplateFD) {1054if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {1055AttrTy *Clone = Attribute->clone(S.Context);1056Clone->setInherited(true);1057FD->addAttr(Clone);1058}1059}10601061void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD,1062const FunctionTemplateDecl &TD) {1063const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();1064copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD);1065copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD);1066copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD);1067}10681069std::string SemaCUDA::getConfigureFuncName() const {1070if (getLangOpts().HIP)1071return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"1072: "hipConfigureCall";10731074// New CUDA kernel launch sequence.1075if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(),1076CudaFeature::CUDA_USES_NEW_LAUNCH))1077return "__cudaPushCallConfiguration";10781079// Legacy CUDA kernel configuration call1080return "cudaConfigureCall";1081}108210831084