Path: blob/main/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
35234 views
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This provides a class for OpenMP runtime code generation.9//10//===----------------------------------------------------------------------===//1112#include "CGOpenMPRuntime.h"13#include "ABIInfoImpl.h"14#include "CGCXXABI.h"15#include "CGCleanup.h"16#include "CGRecordLayout.h"17#include "CodeGenFunction.h"18#include "TargetInfo.h"19#include "clang/AST/APValue.h"20#include "clang/AST/Attr.h"21#include "clang/AST/Decl.h"22#include "clang/AST/OpenMPClause.h"23#include "clang/AST/StmtOpenMP.h"24#include "clang/AST/StmtVisitor.h"25#include "clang/Basic/BitmaskEnum.h"26#include "clang/Basic/FileManager.h"27#include "clang/Basic/OpenMPKinds.h"28#include "clang/Basic/SourceManager.h"29#include "clang/CodeGen/ConstantInitBuilder.h"30#include "llvm/ADT/ArrayRef.h"31#include "llvm/ADT/SetOperations.h"32#include "llvm/ADT/SmallBitVector.h"33#include "llvm/ADT/SmallVector.h"34#include "llvm/ADT/StringExtras.h"35#include "llvm/Bitcode/BitcodeReader.h"36#include "llvm/IR/Constants.h"37#include "llvm/IR/DerivedTypes.h"38#include "llvm/IR/GlobalValue.h"39#include "llvm/IR/InstrTypes.h"40#include "llvm/IR/Value.h"41#include "llvm/Support/AtomicOrdering.h"42#include "llvm/Support/Format.h"43#include "llvm/Support/raw_ostream.h"44#include <cassert>45#include <cstdint>46#include <numeric>47#include <optional>4849using namespace clang;50using namespace CodeGen;51using namespace llvm::omp;5253namespace {54/// Base class for handling code generation inside OpenMP regions.55class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {56public:57/// Kinds of OpenMP regions used in codegen.58enum CGOpenMPRegionKind {59/// Region with outlined function for standalone 'parallel'60/// directive.61ParallelOutlinedRegion,62/// Region with outlined function for standalone 'task' directive.63TaskOutlinedRegion,64/// Region for constructs that do not require function outlining,65/// like 'for', 'sections', 'atomic' etc. directives.66InlinedRegion,67/// Region with outlined function for standalone 'target' directive.68TargetRegion,69};7071CGOpenMPRegionInfo(const CapturedStmt &CS,72const CGOpenMPRegionKind RegionKind,73const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,74bool HasCancel)75: CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),76CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}7778CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,79const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,80bool HasCancel)81: CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),82Kind(Kind), HasCancel(HasCancel) {}8384/// Get a variable or parameter for storing global thread id85/// inside OpenMP construct.86virtual const VarDecl *getThreadIDVariable() const = 0;8788/// Emit the captured statement body.89void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;9091/// Get an LValue for the current ThreadID variable.92/// \return LValue for thread id variable. This LValue always has type int32*.93virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);9495virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}9697CGOpenMPRegionKind getRegionKind() const { return RegionKind; }9899OpenMPDirectiveKind getDirectiveKind() const { return Kind; }100101bool hasCancel() const { return HasCancel; }102103static bool classof(const CGCapturedStmtInfo *Info) {104return Info->getKind() == CR_OpenMP;105}106107~CGOpenMPRegionInfo() override = default;108109protected:110CGOpenMPRegionKind RegionKind;111RegionCodeGenTy CodeGen;112OpenMPDirectiveKind Kind;113bool HasCancel;114};115116/// API for captured statement code generation in OpenMP constructs.117class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {118public:119CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,120const RegionCodeGenTy &CodeGen,121OpenMPDirectiveKind Kind, bool HasCancel,122StringRef HelperName)123: CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,124HasCancel),125ThreadIDVar(ThreadIDVar), HelperName(HelperName) {126assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");127}128129/// Get a variable or parameter for storing global thread id130/// inside OpenMP construct.131const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }132133/// Get the name of the capture helper.134StringRef getHelperName() const override { return HelperName; }135136static bool classof(const CGCapturedStmtInfo *Info) {137return CGOpenMPRegionInfo::classof(Info) &&138cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==139ParallelOutlinedRegion;140}141142private:143/// A variable or parameter storing global thread id for OpenMP144/// constructs.145const VarDecl *ThreadIDVar;146StringRef HelperName;147};148149/// API for captured statement code generation in OpenMP constructs.150class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {151public:152class UntiedTaskActionTy final : public PrePostActionTy {153bool Untied;154const VarDecl *PartIDVar;155const RegionCodeGenTy UntiedCodeGen;156llvm::SwitchInst *UntiedSwitch = nullptr;157158public:159UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,160const RegionCodeGenTy &UntiedCodeGen)161: Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}162void Enter(CodeGenFunction &CGF) override {163if (Untied) {164// Emit task switching point.165LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(166CGF.GetAddrOfLocalVar(PartIDVar),167PartIDVar->getType()->castAs<PointerType>());168llvm::Value *Res =169CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());170llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");171UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);172CGF.EmitBlock(DoneBB);173CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);174CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));175UntiedSwitch->addCase(CGF.Builder.getInt32(0),176CGF.Builder.GetInsertBlock());177emitUntiedSwitch(CGF);178}179}180void emitUntiedSwitch(CodeGenFunction &CGF) const {181if (Untied) {182LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(183CGF.GetAddrOfLocalVar(PartIDVar),184PartIDVar->getType()->castAs<PointerType>());185CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),186PartIdLVal);187UntiedCodeGen(CGF);188CodeGenFunction::JumpDest CurPoint =189CGF.getJumpDestInCurrentScope(".untied.next.");190CGF.EmitBranch(CGF.ReturnBlock.getBlock());191CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));192UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),193CGF.Builder.GetInsertBlock());194CGF.EmitBranchThroughCleanup(CurPoint);195CGF.EmitBlock(CurPoint.getBlock());196}197}198unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }199};200CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,201const VarDecl *ThreadIDVar,202const RegionCodeGenTy &CodeGen,203OpenMPDirectiveKind Kind, bool HasCancel,204const UntiedTaskActionTy &Action)205: CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),206ThreadIDVar(ThreadIDVar), Action(Action) {207assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");208}209210/// Get a variable or parameter for storing global thread id211/// inside OpenMP construct.212const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }213214/// Get an LValue for the current ThreadID variable.215LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;216217/// Get the name of the capture helper.218StringRef getHelperName() const override { return ".omp_outlined."; }219220void emitUntiedSwitch(CodeGenFunction &CGF) override {221Action.emitUntiedSwitch(CGF);222}223224static bool classof(const CGCapturedStmtInfo *Info) {225return CGOpenMPRegionInfo::classof(Info) &&226cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==227TaskOutlinedRegion;228}229230private:231/// A variable or parameter storing global thread id for OpenMP232/// constructs.233const VarDecl *ThreadIDVar;234/// Action for emitting code for untied tasks.235const UntiedTaskActionTy &Action;236};237238/// API for inlined captured statement code generation in OpenMP239/// constructs.240class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {241public:242CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,243const RegionCodeGenTy &CodeGen,244OpenMPDirectiveKind Kind, bool HasCancel)245: CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),246OldCSI(OldCSI),247OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}248249// Retrieve the value of the context parameter.250llvm::Value *getContextValue() const override {251if (OuterRegionInfo)252return OuterRegionInfo->getContextValue();253llvm_unreachable("No context value for inlined OpenMP region");254}255256void setContextValue(llvm::Value *V) override {257if (OuterRegionInfo) {258OuterRegionInfo->setContextValue(V);259return;260}261llvm_unreachable("No context value for inlined OpenMP region");262}263264/// Lookup the captured field decl for a variable.265const FieldDecl *lookup(const VarDecl *VD) const override {266if (OuterRegionInfo)267return OuterRegionInfo->lookup(VD);268// If there is no outer outlined region,no need to lookup in a list of269// captured variables, we can use the original one.270return nullptr;271}272273FieldDecl *getThisFieldDecl() const override {274if (OuterRegionInfo)275return OuterRegionInfo->getThisFieldDecl();276return nullptr;277}278279/// Get a variable or parameter for storing global thread id280/// inside OpenMP construct.281const VarDecl *getThreadIDVariable() const override {282if (OuterRegionInfo)283return OuterRegionInfo->getThreadIDVariable();284return nullptr;285}286287/// Get an LValue for the current ThreadID variable.288LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {289if (OuterRegionInfo)290return OuterRegionInfo->getThreadIDVariableLValue(CGF);291llvm_unreachable("No LValue for inlined OpenMP construct");292}293294/// Get the name of the capture helper.295StringRef getHelperName() const override {296if (auto *OuterRegionInfo = getOldCSI())297return OuterRegionInfo->getHelperName();298llvm_unreachable("No helper name for inlined OpenMP construct");299}300301void emitUntiedSwitch(CodeGenFunction &CGF) override {302if (OuterRegionInfo)303OuterRegionInfo->emitUntiedSwitch(CGF);304}305306CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }307308static bool classof(const CGCapturedStmtInfo *Info) {309return CGOpenMPRegionInfo::classof(Info) &&310cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;311}312313~CGOpenMPInlinedRegionInfo() override = default;314315private:316/// CodeGen info about outer OpenMP region.317CodeGenFunction::CGCapturedStmtInfo *OldCSI;318CGOpenMPRegionInfo *OuterRegionInfo;319};320321/// API for captured statement code generation in OpenMP target322/// constructs. For this captures, implicit parameters are used instead of the323/// captured fields. The name of the target region has to be unique in a given324/// application so it is provided by the client, because only the client has325/// the information to generate that.326class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {327public:328CGOpenMPTargetRegionInfo(const CapturedStmt &CS,329const RegionCodeGenTy &CodeGen, StringRef HelperName)330: CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,331/*HasCancel=*/false),332HelperName(HelperName) {}333334/// This is unused for target regions because each starts executing335/// with a single thread.336const VarDecl *getThreadIDVariable() const override { return nullptr; }337338/// Get the name of the capture helper.339StringRef getHelperName() const override { return HelperName; }340341static bool classof(const CGCapturedStmtInfo *Info) {342return CGOpenMPRegionInfo::classof(Info) &&343cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;344}345346private:347StringRef HelperName;348};349350static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {351llvm_unreachable("No codegen for expressions");352}353/// API for generation of expressions captured in a innermost OpenMP354/// region.355class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {356public:357CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)358: CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,359OMPD_unknown,360/*HasCancel=*/false),361PrivScope(CGF) {362// Make sure the globals captured in the provided statement are local by363// using the privatization logic. We assume the same variable is not364// captured more than once.365for (const auto &C : CS.captures()) {366if (!C.capturesVariable() && !C.capturesVariableByCopy())367continue;368369const VarDecl *VD = C.getCapturedVar();370if (VD->isLocalVarDeclOrParm())371continue;372373DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),374/*RefersToEnclosingVariableOrCapture=*/false,375VD->getType().getNonReferenceType(), VK_LValue,376C.getLocation());377PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());378}379(void)PrivScope.Privatize();380}381382/// Lookup the captured field decl for a variable.383const FieldDecl *lookup(const VarDecl *VD) const override {384if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))385return FD;386return nullptr;387}388389/// Emit the captured statement body.390void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {391llvm_unreachable("No body for expressions");392}393394/// Get a variable or parameter for storing global thread id395/// inside OpenMP construct.396const VarDecl *getThreadIDVariable() const override {397llvm_unreachable("No thread id for expressions");398}399400/// Get the name of the capture helper.401StringRef getHelperName() const override {402llvm_unreachable("No helper name for expressions");403}404405static bool classof(const CGCapturedStmtInfo *Info) { return false; }406407private:408/// Private scope to capture global variables.409CodeGenFunction::OMPPrivateScope PrivScope;410};411412/// RAII for emitting code of OpenMP constructs.413class InlinedOpenMPRegionRAII {414CodeGenFunction &CGF;415llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;416FieldDecl *LambdaThisCaptureField = nullptr;417const CodeGen::CGBlockInfo *BlockInfo = nullptr;418bool NoInheritance = false;419420public:421/// Constructs region for combined constructs.422/// \param CodeGen Code generation sequence for combined directives. Includes423/// a list of functions used for code generation of implicitly inlined424/// regions.425InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,426OpenMPDirectiveKind Kind, bool HasCancel,427bool NoInheritance = true)428: CGF(CGF), NoInheritance(NoInheritance) {429// Start emission for the construct.430CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(431CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);432if (NoInheritance) {433std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);434LambdaThisCaptureField = CGF.LambdaThisCaptureField;435CGF.LambdaThisCaptureField = nullptr;436BlockInfo = CGF.BlockInfo;437CGF.BlockInfo = nullptr;438}439}440441~InlinedOpenMPRegionRAII() {442// Restore original CapturedStmtInfo only if we're done with code emission.443auto *OldCSI =444cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();445delete CGF.CapturedStmtInfo;446CGF.CapturedStmtInfo = OldCSI;447if (NoInheritance) {448std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);449CGF.LambdaThisCaptureField = LambdaThisCaptureField;450CGF.BlockInfo = BlockInfo;451}452}453};454455/// Values for bit flags used in the ident_t to describe the fields.456/// All enumeric elements are named and described in accordance with the code457/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h458enum OpenMPLocationFlags : unsigned {459/// Use trampoline for internal microtask.460OMP_IDENT_IMD = 0x01,461/// Use c-style ident structure.462OMP_IDENT_KMPC = 0x02,463/// Atomic reduction option for kmpc_reduce.464OMP_ATOMIC_REDUCE = 0x10,465/// Explicit 'barrier' directive.466OMP_IDENT_BARRIER_EXPL = 0x20,467/// Implicit barrier in code.468OMP_IDENT_BARRIER_IMPL = 0x40,469/// Implicit barrier in 'for' directive.470OMP_IDENT_BARRIER_IMPL_FOR = 0x40,471/// Implicit barrier in 'sections' directive.472OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,473/// Implicit barrier in 'single' directive.474OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,475/// Call of __kmp_for_static_init for static loop.476OMP_IDENT_WORK_LOOP = 0x200,477/// Call of __kmp_for_static_init for sections.478OMP_IDENT_WORK_SECTIONS = 0x400,479/// Call of __kmp_for_static_init for distribute.480OMP_IDENT_WORK_DISTRIBUTE = 0x800,481LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)482};483484/// Describes ident structure that describes a source location.485/// All descriptions are taken from486/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h487/// Original structure:488/// typedef struct ident {489/// kmp_int32 reserved_1; /**< might be used in Fortran;490/// see above */491/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;492/// KMP_IDENT_KMPC identifies this union493/// member */494/// kmp_int32 reserved_2; /**< not really used in Fortran any more;495/// see above */496///#if USE_ITT_BUILD497/// /* but currently used for storing498/// region-specific ITT */499/// /* contextual information. */500///#endif /* USE_ITT_BUILD */501/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for502/// C++ */503/// char const *psource; /**< String describing the source location.504/// The string is composed of semi-colon separated505// fields which describe the source file,506/// the function and a pair of line numbers that507/// delimit the construct.508/// */509/// } ident_t;510enum IdentFieldIndex {511/// might be used in Fortran512IdentField_Reserved_1,513/// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.514IdentField_Flags,515/// Not really used in Fortran any more516IdentField_Reserved_2,517/// Source[4] in Fortran, do not use for C++518IdentField_Reserved_3,519/// String describing the source location. The string is composed of520/// semi-colon separated fields which describe the source file, the function521/// and a pair of line numbers that delimit the construct.522IdentField_PSource523};524525/// Schedule types for 'omp for' loops (these enumerators are taken from526/// the enum sched_type in kmp.h).527enum OpenMPSchedType {528/// Lower bound for default (unordered) versions.529OMP_sch_lower = 32,530OMP_sch_static_chunked = 33,531OMP_sch_static = 34,532OMP_sch_dynamic_chunked = 35,533OMP_sch_guided_chunked = 36,534OMP_sch_runtime = 37,535OMP_sch_auto = 38,536/// static with chunk adjustment (e.g., simd)537OMP_sch_static_balanced_chunked = 45,538/// Lower bound for 'ordered' versions.539OMP_ord_lower = 64,540OMP_ord_static_chunked = 65,541OMP_ord_static = 66,542OMP_ord_dynamic_chunked = 67,543OMP_ord_guided_chunked = 68,544OMP_ord_runtime = 69,545OMP_ord_auto = 70,546OMP_sch_default = OMP_sch_static,547/// dist_schedule types548OMP_dist_sch_static_chunked = 91,549OMP_dist_sch_static = 92,550/// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.551/// Set if the monotonic schedule modifier was present.552OMP_sch_modifier_monotonic = (1 << 29),553/// Set if the nonmonotonic schedule modifier was present.554OMP_sch_modifier_nonmonotonic = (1 << 30),555};556557/// A basic class for pre|post-action for advanced codegen sequence for OpenMP558/// region.559class CleanupTy final : public EHScopeStack::Cleanup {560PrePostActionTy *Action;561562public:563explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}564void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {565if (!CGF.HaveInsertPoint())566return;567Action->Exit(CGF);568}569};570571} // anonymous namespace572573void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {574CodeGenFunction::RunCleanupsScope Scope(CGF);575if (PrePostAction) {576CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);577Callback(CodeGen, CGF, *PrePostAction);578} else {579PrePostActionTy Action;580Callback(CodeGen, CGF, Action);581}582}583584/// Check if the combiner is a call to UDR combiner and if it is so return the585/// UDR decl used for reduction.586static const OMPDeclareReductionDecl *587getReductionInit(const Expr *ReductionOp) {588if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))589if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))590if (const auto *DRE =591dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))592if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))593return DRD;594return nullptr;595}596597static void emitInitWithReductionInitializer(CodeGenFunction &CGF,598const OMPDeclareReductionDecl *DRD,599const Expr *InitOp,600Address Private, Address Original,601QualType Ty) {602if (DRD->getInitializer()) {603std::pair<llvm::Function *, llvm::Function *> Reduction =604CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);605const auto *CE = cast<CallExpr>(InitOp);606const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());607const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();608const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();609const auto *LHSDRE =610cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());611const auto *RHSDRE =612cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());613CodeGenFunction::OMPPrivateScope PrivateScope(CGF);614PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);615PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);616(void)PrivateScope.Privatize();617RValue Func = RValue::get(Reduction.second);618CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);619CGF.EmitIgnoredExpr(InitOp);620} else {621llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);622std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});623auto *GV = new llvm::GlobalVariable(624CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,625llvm::GlobalValue::PrivateLinkage, Init, Name);626LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);627RValue InitRVal;628switch (CGF.getEvaluationKind(Ty)) {629case TEK_Scalar:630InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());631break;632case TEK_Complex:633InitRVal =634RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));635break;636case TEK_Aggregate: {637OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);638CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);639CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),640/*IsInitializer=*/false);641return;642}643}644OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);645CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);646CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),647/*IsInitializer=*/false);648}649}650651/// Emit initialization of arrays of complex types.652/// \param DestAddr Address of the array.653/// \param Type Type of array.654/// \param Init Initial expression of array.655/// \param SrcAddr Address of the original array.656static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,657QualType Type, bool EmitDeclareReductionInit,658const Expr *Init,659const OMPDeclareReductionDecl *DRD,660Address SrcAddr = Address::invalid()) {661// Perform element-by-element initialization.662QualType ElementTy;663664// Drill down to the base element type on both arrays.665const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();666llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);667if (DRD)668SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());669670llvm::Value *SrcBegin = nullptr;671if (DRD)672SrcBegin = SrcAddr.emitRawPointer(CGF);673llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);674// Cast from pointer to array type to pointer to single element.675llvm::Value *DestEnd =676CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);677// The basic structure here is a while-do loop.678llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");679llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");680llvm::Value *IsEmpty =681CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");682CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);683684// Enter the loop body, making that address the current address.685llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();686CGF.EmitBlock(BodyBB);687688CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);689690llvm::PHINode *SrcElementPHI = nullptr;691Address SrcElementCurrent = Address::invalid();692if (DRD) {693SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,694"omp.arraycpy.srcElementPast");695SrcElementPHI->addIncoming(SrcBegin, EntryBB);696SrcElementCurrent =697Address(SrcElementPHI, SrcAddr.getElementType(),698SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));699}700llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(701DestBegin->getType(), 2, "omp.arraycpy.destElementPast");702DestElementPHI->addIncoming(DestBegin, EntryBB);703Address DestElementCurrent =704Address(DestElementPHI, DestAddr.getElementType(),705DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));706707// Emit copy.708{709CodeGenFunction::RunCleanupsScope InitScope(CGF);710if (EmitDeclareReductionInit) {711emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,712SrcElementCurrent, ElementTy);713} else714CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),715/*IsInitializer=*/false);716}717718if (DRD) {719// Shift the address forward by one element.720llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(721SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,722"omp.arraycpy.dest.element");723SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());724}725726// Shift the address forward by one element.727llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(728DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,729"omp.arraycpy.dest.element");730// Check whether we've reached the end.731llvm::Value *Done =732CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");733CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);734DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());735736// Done.737CGF.EmitBlock(DoneBB, /*IsFinished=*/true);738}739740LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {741return CGF.EmitOMPSharedLValue(E);742}743744LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,745const Expr *E) {746if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))747return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);748return LValue();749}750751void ReductionCodeGen::emitAggregateInitialization(752CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,753const OMPDeclareReductionDecl *DRD) {754// Emit VarDecl with copy init for arrays.755// Get the address of the original variable captured in current756// captured region.757const auto *PrivateVD =758cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());759bool EmitDeclareReductionInit =760DRD && (DRD->getInitializer() || !PrivateVD->hasInit());761EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),762EmitDeclareReductionInit,763EmitDeclareReductionInit ? ClausesData[N].ReductionOp764: PrivateVD->getInit(),765DRD, SharedAddr);766}767768ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,769ArrayRef<const Expr *> Origs,770ArrayRef<const Expr *> Privates,771ArrayRef<const Expr *> ReductionOps) {772ClausesData.reserve(Shareds.size());773SharedAddresses.reserve(Shareds.size());774Sizes.reserve(Shareds.size());775BaseDecls.reserve(Shareds.size());776const auto *IOrig = Origs.begin();777const auto *IPriv = Privates.begin();778const auto *IRed = ReductionOps.begin();779for (const Expr *Ref : Shareds) {780ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);781std::advance(IOrig, 1);782std::advance(IPriv, 1);783std::advance(IRed, 1);784}785}786787void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {788assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&789"Number of generated lvalues must be exactly N.");790LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);791LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);792SharedAddresses.emplace_back(First, Second);793if (ClausesData[N].Shared == ClausesData[N].Ref) {794OrigAddresses.emplace_back(First, Second);795} else {796LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);797LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);798OrigAddresses.emplace_back(First, Second);799}800}801802void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {803QualType PrivateType = getPrivateType(N);804bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);805if (!PrivateType->isVariablyModifiedType()) {806Sizes.emplace_back(807CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),808nullptr);809return;810}811llvm::Value *Size;812llvm::Value *SizeInChars;813auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();814auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);815if (AsArraySection) {816Size = CGF.Builder.CreatePtrDiff(ElemType,817OrigAddresses[N].second.getPointer(CGF),818OrigAddresses[N].first.getPointer(CGF));819Size = CGF.Builder.CreateNUWAdd(820Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));821SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);822} else {823SizeInChars =824CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());825Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);826}827Sizes.emplace_back(SizeInChars, Size);828CodeGenFunction::OpaqueValueMapping OpaqueMap(829CGF,830cast<OpaqueValueExpr>(831CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),832RValue::get(Size));833CGF.EmitVariablyModifiedType(PrivateType);834}835836void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,837llvm::Value *Size) {838QualType PrivateType = getPrivateType(N);839if (!PrivateType->isVariablyModifiedType()) {840assert(!Size && !Sizes[N].second &&841"Size should be nullptr for non-variably modified reduction "842"items.");843return;844}845CodeGenFunction::OpaqueValueMapping OpaqueMap(846CGF,847cast<OpaqueValueExpr>(848CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),849RValue::get(Size));850CGF.EmitVariablyModifiedType(PrivateType);851}852853void ReductionCodeGen::emitInitialization(854CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,855llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {856assert(SharedAddresses.size() > N && "No variable was generated");857const auto *PrivateVD =858cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());859const OMPDeclareReductionDecl *DRD =860getReductionInit(ClausesData[N].ReductionOp);861if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {862if (DRD && DRD->getInitializer())863(void)DefaultInit(CGF);864emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);865} else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {866(void)DefaultInit(CGF);867QualType SharedType = SharedAddresses[N].first.getType();868emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,869PrivateAddr, SharedAddr, SharedType);870} else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&871!CGF.isTrivialInitializer(PrivateVD->getInit())) {872CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,873PrivateVD->getType().getQualifiers(),874/*IsInitializer=*/false);875}876}877878bool ReductionCodeGen::needCleanups(unsigned N) {879QualType PrivateType = getPrivateType(N);880QualType::DestructionKind DTorKind = PrivateType.isDestructedType();881return DTorKind != QualType::DK_none;882}883884void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,885Address PrivateAddr) {886QualType PrivateType = getPrivateType(N);887QualType::DestructionKind DTorKind = PrivateType.isDestructedType();888if (needCleanups(N)) {889PrivateAddr =890PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));891CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);892}893}894895static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,896LValue BaseLV) {897BaseTy = BaseTy.getNonReferenceType();898while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&899!CGF.getContext().hasSameType(BaseTy, ElTy)) {900if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {901BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);902} else {903LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);904BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);905}906BaseTy = BaseTy->getPointeeType();907}908return CGF.MakeAddrLValue(909BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),910BaseLV.getType(), BaseLV.getBaseInfo(),911CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));912}913914static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,915Address OriginalBaseAddress, llvm::Value *Addr) {916RawAddress Tmp = RawAddress::invalid();917Address TopTmp = Address::invalid();918Address MostTopTmp = Address::invalid();919BaseTy = BaseTy.getNonReferenceType();920while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&921!CGF.getContext().hasSameType(BaseTy, ElTy)) {922Tmp = CGF.CreateMemTemp(BaseTy);923if (TopTmp.isValid())924CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);925else926MostTopTmp = Tmp;927TopTmp = Tmp;928BaseTy = BaseTy->getPointeeType();929}930931if (Tmp.isValid()) {932Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(933Addr, Tmp.getElementType());934CGF.Builder.CreateStore(Addr, Tmp);935return MostTopTmp;936}937938Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(939Addr, OriginalBaseAddress.getType());940return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);941}942943static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {944const VarDecl *OrigVD = nullptr;945if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {946const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();947while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))948Base = TempOASE->getBase()->IgnoreParenImpCasts();949while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))950Base = TempASE->getBase()->IgnoreParenImpCasts();951DE = cast<DeclRefExpr>(Base);952OrigVD = cast<VarDecl>(DE->getDecl());953} else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {954const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();955while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))956Base = TempASE->getBase()->IgnoreParenImpCasts();957DE = cast<DeclRefExpr>(Base);958OrigVD = cast<VarDecl>(DE->getDecl());959}960return OrigVD;961}962963Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,964Address PrivateAddr) {965const DeclRefExpr *DE;966if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {967BaseDecls.emplace_back(OrigVD);968LValue OriginalBaseLValue = CGF.EmitLValue(DE);969LValue BaseLValue =970loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),971OriginalBaseLValue);972Address SharedAddr = SharedAddresses[N].first.getAddress();973llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(974SharedAddr.getElementType(), BaseLValue.getPointer(CGF),975SharedAddr.emitRawPointer(CGF));976llvm::Value *PrivatePointer =977CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(978PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());979llvm::Value *Ptr = CGF.Builder.CreateGEP(980SharedAddr.getElementType(), PrivatePointer, Adjustment);981return castToBase(CGF, OrigVD->getType(),982SharedAddresses[N].first.getType(),983OriginalBaseLValue.getAddress(), Ptr);984}985BaseDecls.emplace_back(986cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));987return PrivateAddr;988}989990bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {991const OMPDeclareReductionDecl *DRD =992getReductionInit(ClausesData[N].ReductionOp);993return DRD && DRD->getInitializer();994}995996LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {997return CGF.EmitLoadOfPointerLValue(998CGF.GetAddrOfLocalVar(getThreadIDVariable()),999getThreadIDVariable()->getType()->castAs<PointerType>());1000}10011002void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {1003if (!CGF.HaveInsertPoint())1004return;1005// 1.2.2 OpenMP Language Terminology1006// Structured block - An executable statement with a single entry at the1007// top and a single exit at the bottom.1008// The point of exit cannot be a branch out of the structured block.1009// longjmp() and throw() must not violate the entry/exit criteria.1010CGF.EHStack.pushTerminate();1011if (S)1012CGF.incrementProfileCounter(S);1013CodeGen(CGF);1014CGF.EHStack.popTerminate();1015}10161017LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(1018CodeGenFunction &CGF) {1019return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),1020getThreadIDVariable()->getType(),1021AlignmentSource::Decl);1022}10231024static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,1025QualType FieldTy) {1026auto *Field = FieldDecl::Create(1027C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,1028C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),1029/*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);1030Field->setAccess(AS_public);1031DC->addDecl(Field);1032return Field;1033}10341035CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)1036: CGM(CGM), OMPBuilder(CGM.getModule()) {1037KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);1038llvm::OpenMPIRBuilderConfig Config(1039CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),1040CGM.getLangOpts().OpenMPOffloadMandatory,1041/*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,1042hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);1043OMPBuilder.initialize();1044OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice1045? CGM.getLangOpts().OMPHostIRFile1046: StringRef{});1047OMPBuilder.setConfig(Config);10481049// The user forces the compiler to behave as if omp requires1050// unified_shared_memory was given.1051if (CGM.getLangOpts().OpenMPForceUSM) {1052HasRequiresUnifiedSharedMemory = true;1053OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);1054}1055}10561057void CGOpenMPRuntime::clear() {1058InternalVars.clear();1059// Clean non-target variable declarations possibly used only in debug info.1060for (const auto &Data : EmittedNonTargetVariables) {1061if (!Data.getValue().pointsToAliveValue())1062continue;1063auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());1064if (!GV)1065continue;1066if (!GV->isDeclaration() || GV->getNumUses() > 0)1067continue;1068GV->eraseFromParent();1069}1070}10711072std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {1073return OMPBuilder.createPlatformSpecificName(Parts);1074}10751076static llvm::Function *1077emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,1078const Expr *CombinerInitializer, const VarDecl *In,1079const VarDecl *Out, bool IsCombiner) {1080// void .omp_combiner.(Ty *in, Ty *out);1081ASTContext &C = CGM.getContext();1082QualType PtrTy = C.getPointerType(Ty).withRestrict();1083FunctionArgList Args;1084ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),1085/*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);1086ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),1087/*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);1088Args.push_back(&OmpOutParm);1089Args.push_back(&OmpInParm);1090const CGFunctionInfo &FnInfo =1091CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);1092llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);1093std::string Name = CGM.getOpenMPRuntime().getName(1094{IsCombiner ? "omp_combiner" : "omp_initializer", ""});1095auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,1096Name, &CGM.getModule());1097CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);1098if (CGM.getLangOpts().Optimize) {1099Fn->removeFnAttr(llvm::Attribute::NoInline);1100Fn->removeFnAttr(llvm::Attribute::OptimizeNone);1101Fn->addFnAttr(llvm::Attribute::AlwaysInline);1102}1103CodeGenFunction CGF(CGM);1104// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.1105// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.1106CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),1107Out->getLocation());1108CodeGenFunction::OMPPrivateScope Scope(CGF);1109Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);1110Scope.addPrivate(1111In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())1112.getAddress());1113Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);1114Scope.addPrivate(1115Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())1116.getAddress());1117(void)Scope.Privatize();1118if (!IsCombiner && Out->hasInit() &&1119!CGF.isTrivialInitializer(Out->getInit())) {1120CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),1121Out->getType().getQualifiers(),1122/*IsInitializer=*/true);1123}1124if (CombinerInitializer)1125CGF.EmitIgnoredExpr(CombinerInitializer);1126Scope.ForceCleanup();1127CGF.FinishFunction();1128return Fn;1129}11301131void CGOpenMPRuntime::emitUserDefinedReduction(1132CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {1133if (UDRMap.count(D) > 0)1134return;1135llvm::Function *Combiner = emitCombinerOrInitializer(1136CGM, D->getType(), D->getCombiner(),1137cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),1138cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),1139/*IsCombiner=*/true);1140llvm::Function *Initializer = nullptr;1141if (const Expr *Init = D->getInitializer()) {1142Initializer = emitCombinerOrInitializer(1143CGM, D->getType(),1144D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init1145: nullptr,1146cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),1147cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),1148/*IsCombiner=*/false);1149}1150UDRMap.try_emplace(D, Combiner, Initializer);1151if (CGF) {1152auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);1153Decls.second.push_back(D);1154}1155}11561157std::pair<llvm::Function *, llvm::Function *>1158CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {1159auto I = UDRMap.find(D);1160if (I != UDRMap.end())1161return I->second;1162emitUserDefinedReduction(/*CGF=*/nullptr, D);1163return UDRMap.lookup(D);1164}11651166namespace {1167// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR1168// Builder if one is present.1169struct PushAndPopStackRAII {1170PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,1171bool HasCancel, llvm::omp::Directive Kind)1172: OMPBuilder(OMPBuilder) {1173if (!OMPBuilder)1174return;11751176// The following callback is the crucial part of clangs cleanup process.1177//1178// NOTE:1179// Once the OpenMPIRBuilder is used to create parallel regions (and1180// similar), the cancellation destination (Dest below) is determined via1181// IP. That means if we have variables to finalize we split the block at IP,1182// use the new block (=BB) as destination to build a JumpDest (via1183// getJumpDestInCurrentScope(BB)) which then is fed to1184// EmitBranchThroughCleanup. Furthermore, there will not be the need1185// to push & pop an FinalizationInfo object.1186// The FiniCB will still be needed but at the point where the1187// OpenMPIRBuilder is asked to construct a parallel (or similar) construct.1188auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {1189assert(IP.getBlock()->end() == IP.getPoint() &&1190"Clang CG should cause non-terminated block!");1191CGBuilderTy::InsertPointGuard IPG(CGF.Builder);1192CGF.Builder.restoreIP(IP);1193CodeGenFunction::JumpDest Dest =1194CGF.getOMPCancelDestination(OMPD_parallel);1195CGF.EmitBranchThroughCleanup(Dest);1196};11971198// TODO: Remove this once we emit parallel regions through the1199// OpenMPIRBuilder as it can do this setup internally.1200llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});1201OMPBuilder->pushFinalizationCB(std::move(FI));1202}1203~PushAndPopStackRAII() {1204if (OMPBuilder)1205OMPBuilder->popFinalizationCB();1206}1207llvm::OpenMPIRBuilder *OMPBuilder;1208};1209} // namespace12101211static llvm::Function *emitParallelOrTeamsOutlinedFunction(1212CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,1213const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,1214const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {1215assert(ThreadIDVar->getType()->isPointerType() &&1216"thread id variable must be of type kmp_int32 *");1217CodeGenFunction CGF(CGM, true);1218bool HasCancel = false;1219if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))1220HasCancel = OPD->hasCancel();1221else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))1222HasCancel = OPD->hasCancel();1223else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))1224HasCancel = OPSD->hasCancel();1225else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))1226HasCancel = OPFD->hasCancel();1227else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))1228HasCancel = OPFD->hasCancel();1229else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))1230HasCancel = OPFD->hasCancel();1231else if (const auto *OPFD =1232dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))1233HasCancel = OPFD->hasCancel();1234else if (const auto *OPFD =1235dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))1236HasCancel = OPFD->hasCancel();12371238// TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new1239// parallel region to make cancellation barriers work properly.1240llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();1241PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);1242CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,1243HasCancel, OutlinedHelperName);1244CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);1245return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());1246}12471248std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {1249std::string Suffix = getName({"omp_outlined"});1250return (Name + Suffix).str();1251}12521253std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {1254return getOutlinedHelperName(CGF.CurFn->getName());1255}12561257std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {1258std::string Suffix = getName({"omp", "reduction", "reduction_func"});1259return (Name + Suffix).str();1260}12611262llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(1263CodeGenFunction &CGF, const OMPExecutableDirective &D,1264const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,1265const RegionCodeGenTy &CodeGen) {1266const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);1267return emitParallelOrTeamsOutlinedFunction(1268CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),1269CodeGen);1270}12711272llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(1273CodeGenFunction &CGF, const OMPExecutableDirective &D,1274const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,1275const RegionCodeGenTy &CodeGen) {1276const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);1277return emitParallelOrTeamsOutlinedFunction(1278CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),1279CodeGen);1280}12811282llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(1283const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,1284const VarDecl *PartIDVar, const VarDecl *TaskTVar,1285OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,1286bool Tied, unsigned &NumberOfParts) {1287auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,1288PrePostActionTy &) {1289llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());1290llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());1291llvm::Value *TaskArgs[] = {1292UpLoc, ThreadID,1293CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),1294TaskTVar->getType()->castAs<PointerType>())1295.getPointer(CGF)};1296CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(1297CGM.getModule(), OMPRTL___kmpc_omp_task),1298TaskArgs);1299};1300CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,1301UntiedCodeGen);1302CodeGen.setAction(Action);1303assert(!ThreadIDVar->getType()->isPointerType() &&1304"thread id variable must be of type kmp_int32 for tasks");1305const OpenMPDirectiveKind Region =1306isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop1307: OMPD_task;1308const CapturedStmt *CS = D.getCapturedStmt(Region);1309bool HasCancel = false;1310if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))1311HasCancel = TD->hasCancel();1312else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))1313HasCancel = TD->hasCancel();1314else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))1315HasCancel = TD->hasCancel();1316else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))1317HasCancel = TD->hasCancel();13181319CodeGenFunction CGF(CGM, true);1320CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,1321InnermostKind, HasCancel, Action);1322CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);1323llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);1324if (!Tied)1325NumberOfParts = Action.getNumberOfParts();1326return Res;1327}13281329void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,1330bool AtCurrentPoint) {1331auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);1332assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");13331334llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);1335if (AtCurrentPoint) {1336Elem.second.ServiceInsertPt = new llvm::BitCastInst(1337Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());1338} else {1339Elem.second.ServiceInsertPt =1340new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");1341Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);1342}1343}13441345void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {1346auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);1347if (Elem.second.ServiceInsertPt) {1348llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;1349Elem.second.ServiceInsertPt = nullptr;1350Ptr->eraseFromParent();1351}1352}13531354static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,1355SourceLocation Loc,1356SmallString<128> &Buffer) {1357llvm::raw_svector_ostream OS(Buffer);1358// Build debug location1359PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);1360OS << ";" << PLoc.getFilename() << ";";1361if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))1362OS << FD->getQualifiedNameAsString();1363OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";1364return OS.str();1365}13661367llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,1368SourceLocation Loc,1369unsigned Flags, bool EmitLoc) {1370uint32_t SrcLocStrSize;1371llvm::Constant *SrcLocStr;1372if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==1373llvm::codegenoptions::NoDebugInfo) ||1374Loc.isInvalid()) {1375SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);1376} else {1377std::string FunctionName;1378if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))1379FunctionName = FD->getQualifiedNameAsString();1380PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);1381const char *FileName = PLoc.getFilename();1382unsigned Line = PLoc.getLine();1383unsigned Column = PLoc.getColumn();1384SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,1385Column, SrcLocStrSize);1386}1387unsigned Reserved2Flags = getDefaultLocationReserved2Flags();1388return OMPBuilder.getOrCreateIdent(1389SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);1390}13911392llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,1393SourceLocation Loc) {1394assert(CGF.CurFn && "No function in current CodeGenFunction.");1395// If the OpenMPIRBuilder is used we need to use it for all thread id calls as1396// the clang invariants used below might be broken.1397if (CGM.getLangOpts().OpenMPIRBuilder) {1398SmallString<128> Buffer;1399OMPBuilder.updateToLocation(CGF.Builder.saveIP());1400uint32_t SrcLocStrSize;1401auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(1402getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);1403return OMPBuilder.getOrCreateThreadID(1404OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));1405}14061407llvm::Value *ThreadID = nullptr;1408// Check whether we've already cached a load of the thread id in this1409// function.1410auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);1411if (I != OpenMPLocThreadIDMap.end()) {1412ThreadID = I->second.ThreadID;1413if (ThreadID != nullptr)1414return ThreadID;1415}1416// If exceptions are enabled, do not use parameter to avoid possible crash.1417if (auto *OMPRegionInfo =1418dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {1419if (OMPRegionInfo->getThreadIDVariable()) {1420// Check if this an outlined function with thread id passed as argument.1421LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);1422llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();1423if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||1424!CGF.getLangOpts().CXXExceptions ||1425CGF.Builder.GetInsertBlock() == TopBlock ||1426!isa<llvm::Instruction>(LVal.getPointer(CGF)) ||1427cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==1428TopBlock ||1429cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==1430CGF.Builder.GetInsertBlock()) {1431ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);1432// If value loaded in entry block, cache it and use it everywhere in1433// function.1434if (CGF.Builder.GetInsertBlock() == TopBlock) {1435auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);1436Elem.second.ThreadID = ThreadID;1437}1438return ThreadID;1439}1440}1441}14421443// This is not an outlined function region - need to call __kmpc_int321444// kmpc_global_thread_num(ident_t *loc).1445// Generate thread id value and cache this value for use across the1446// function.1447auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);1448if (!Elem.second.ServiceInsertPt)1449setLocThreadIdInsertPt(CGF);1450CGBuilderTy::InsertPointGuard IPG(CGF.Builder);1451CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);1452auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);1453llvm::CallInst *Call = CGF.Builder.CreateCall(1454OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),1455OMPRTL___kmpc_global_thread_num),1456emitUpdateLocation(CGF, Loc));1457Call->setCallingConv(CGF.getRuntimeCC());1458Elem.second.ThreadID = Call;1459return Call;1460}14611462void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {1463assert(CGF.CurFn && "No function in current CodeGenFunction.");1464if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {1465clearLocThreadIdInsertPt(CGF);1466OpenMPLocThreadIDMap.erase(CGF.CurFn);1467}1468if (FunctionUDRMap.count(CGF.CurFn) > 0) {1469for(const auto *D : FunctionUDRMap[CGF.CurFn])1470UDRMap.erase(D);1471FunctionUDRMap.erase(CGF.CurFn);1472}1473auto I = FunctionUDMMap.find(CGF.CurFn);1474if (I != FunctionUDMMap.end()) {1475for(const auto *D : I->second)1476UDMMap.erase(D);1477FunctionUDMMap.erase(I);1478}1479LastprivateConditionalToTypes.erase(CGF.CurFn);1480FunctionToUntiedTaskStackMap.erase(CGF.CurFn);1481}14821483llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {1484return OMPBuilder.IdentPtr;1485}14861487llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {1488if (!Kmpc_MicroTy) {1489// Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)1490llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),1491llvm::PointerType::getUnqual(CGM.Int32Ty)};1492Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);1493}1494return llvm::PointerType::getUnqual(Kmpc_MicroTy);1495}14961497llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind1498convertDeviceClause(const VarDecl *VD) {1499std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =1500OMPDeclareTargetDeclAttr::getDeviceType(VD);1501if (!DevTy)1502return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;15031504switch ((int)*DevTy) { // Avoid -Wcovered-switch-default1505case OMPDeclareTargetDeclAttr::DT_Host:1506return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;1507break;1508case OMPDeclareTargetDeclAttr::DT_NoHost:1509return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;1510break;1511case OMPDeclareTargetDeclAttr::DT_Any:1512return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;1513break;1514default:1515return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;1516break;1517}1518}15191520llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind1521convertCaptureClause(const VarDecl *VD) {1522std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =1523OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);1524if (!MapType)1525return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;1526switch ((int)*MapType) { // Avoid -Wcovered-switch-default1527case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:1528return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;1529break;1530case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:1531return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;1532break;1533case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:1534return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;1535break;1536default:1537return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;1538break;1539}1540}15411542static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(1543CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,1544SourceLocation BeginLoc, llvm::StringRef ParentName = "") {15451546auto FileInfoCallBack = [&]() {1547SourceManager &SM = CGM.getContext().getSourceManager();1548PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);15491550llvm::sys::fs::UniqueID ID;1551if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {1552PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);1553}15541555return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());1556};15571558return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);1559}15601561ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {1562auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };15631564auto LinkageForVariable = [&VD, this]() {1565return CGM.getLLVMLinkageVarDefinition(VD);1566};15671568std::vector<llvm::GlobalVariable *> GeneratedRefs;15691570llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(1571CGM.getContext().getPointerType(VD->getType()));1572llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(1573convertCaptureClause(VD), convertDeviceClause(VD),1574VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,1575VD->isExternallyVisible(),1576getEntryInfoFromPresumedLoc(CGM, OMPBuilder,1577VD->getCanonicalDecl()->getBeginLoc()),1578CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,1579CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,1580LinkageForVariable);15811582if (!addr)1583return ConstantAddress::invalid();1584return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));1585}15861587llvm::Constant *1588CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {1589assert(!CGM.getLangOpts().OpenMPUseTLS ||1590!CGM.getContext().getTargetInfo().isTLSSupported());1591// Lookup the entry, lazily creating it if necessary.1592std::string Suffix = getName({"cache", ""});1593return OMPBuilder.getOrCreateInternalVariable(1594CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());1595}15961597Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,1598const VarDecl *VD,1599Address VDAddr,1600SourceLocation Loc) {1601if (CGM.getLangOpts().OpenMPUseTLS &&1602CGM.getContext().getTargetInfo().isTLSSupported())1603return VDAddr;16041605llvm::Type *VarTy = VDAddr.getElementType();1606llvm::Value *Args[] = {1607emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),1608CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),1609CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),1610getOrCreateThreadPrivateCache(VD)};1611return Address(1612CGF.EmitRuntimeCall(1613OMPBuilder.getOrCreateRuntimeFunction(1614CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),1615Args),1616CGF.Int8Ty, VDAddr.getAlignment());1617}16181619void CGOpenMPRuntime::emitThreadPrivateVarInit(1620CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,1621llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {1622// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime1623// library.1624llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);1625CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(1626CGM.getModule(), OMPRTL___kmpc_global_thread_num),1627OMPLoc);1628// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)1629// to register constructor/destructor for variable.1630llvm::Value *Args[] = {1631OMPLoc,1632CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),1633Ctor, CopyCtor, Dtor};1634CGF.EmitRuntimeCall(1635OMPBuilder.getOrCreateRuntimeFunction(1636CGM.getModule(), OMPRTL___kmpc_threadprivate_register),1637Args);1638}16391640llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(1641const VarDecl *VD, Address VDAddr, SourceLocation Loc,1642bool PerformInit, CodeGenFunction *CGF) {1643if (CGM.getLangOpts().OpenMPUseTLS &&1644CGM.getContext().getTargetInfo().isTLSSupported())1645return nullptr;16461647VD = VD->getDefinition(CGM.getContext());1648if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {1649QualType ASTTy = VD->getType();16501651llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;1652const Expr *Init = VD->getAnyInitializer();1653if (CGM.getLangOpts().CPlusPlus && PerformInit) {1654// Generate function that re-emits the declaration's initializer into the1655// threadprivate copy of the variable VD1656CodeGenFunction CtorCGF(CGM);1657FunctionArgList Args;1658ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,1659/*Id=*/nullptr, CGM.getContext().VoidPtrTy,1660ImplicitParamKind::Other);1661Args.push_back(&Dst);16621663const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(1664CGM.getContext().VoidPtrTy, Args);1665llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);1666std::string Name = getName({"__kmpc_global_ctor_", ""});1667llvm::Function *Fn =1668CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);1669CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,1670Args, Loc, Loc);1671llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(1672CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,1673CGM.getContext().VoidPtrTy, Dst.getLocation());1674Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),1675VDAddr.getAlignment());1676CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),1677/*IsInitializer=*/true);1678ArgVal = CtorCGF.EmitLoadOfScalar(1679CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,1680CGM.getContext().VoidPtrTy, Dst.getLocation());1681CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);1682CtorCGF.FinishFunction();1683Ctor = Fn;1684}1685if (VD->getType().isDestructedType() != QualType::DK_none) {1686// Generate function that emits destructor call for the threadprivate copy1687// of the variable VD1688CodeGenFunction DtorCGF(CGM);1689FunctionArgList Args;1690ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,1691/*Id=*/nullptr, CGM.getContext().VoidPtrTy,1692ImplicitParamKind::Other);1693Args.push_back(&Dst);16941695const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(1696CGM.getContext().VoidTy, Args);1697llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);1698std::string Name = getName({"__kmpc_global_dtor_", ""});1699llvm::Function *Fn =1700CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);1701auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);1702DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,1703Loc, Loc);1704// Create a scope with an artificial location for the body of this function.1705auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);1706llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(1707DtorCGF.GetAddrOfLocalVar(&Dst),1708/*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());1709DtorCGF.emitDestroy(1710Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,1711DtorCGF.getDestroyer(ASTTy.isDestructedType()),1712DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));1713DtorCGF.FinishFunction();1714Dtor = Fn;1715}1716// Do not emit init function if it is not required.1717if (!Ctor && !Dtor)1718return nullptr;17191720llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};1721auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,1722/*isVarArg=*/false)1723->getPointerTo();1724// Copying constructor for the threadprivate variable.1725// Must be NULL - reserved by runtime, but currently it requires that this1726// parameter is always NULL. Otherwise it fires assertion.1727CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);1728if (Ctor == nullptr) {1729auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,1730/*isVarArg=*/false)1731->getPointerTo();1732Ctor = llvm::Constant::getNullValue(CtorTy);1733}1734if (Dtor == nullptr) {1735auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,1736/*isVarArg=*/false)1737->getPointerTo();1738Dtor = llvm::Constant::getNullValue(DtorTy);1739}1740if (!CGF) {1741auto *InitFunctionTy =1742llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);1743std::string Name = getName({"__omp_threadprivate_init_", ""});1744llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(1745InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());1746CodeGenFunction InitCGF(CGM);1747FunctionArgList ArgList;1748InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,1749CGM.getTypes().arrangeNullaryFunction(), ArgList,1750Loc, Loc);1751emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);1752InitCGF.FinishFunction();1753return InitFunction;1754}1755emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);1756}1757return nullptr;1758}17591760void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,1761llvm::GlobalValue *GV) {1762std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =1763OMPDeclareTargetDeclAttr::getActiveAttr(FD);17641765// We only need to handle active 'indirect' declare target functions.1766if (!ActiveAttr || !(*ActiveAttr)->getIndirect())1767return;17681769// Get a mangled name to store the new device global in.1770llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(1771CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());1772SmallString<128> Name;1773OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);17741775// We need to generate a new global to hold the address of the indirectly1776// called device function. Doing this allows us to keep the visibility and1777// linkage of the associated function unchanged while allowing the runtime to1778// access its value.1779llvm::GlobalValue *Addr = GV;1780if (CGM.getLangOpts().OpenMPIsTargetDevice) {1781Addr = new llvm::GlobalVariable(1782CGM.getModule(), CGM.VoidPtrTy,1783/*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,1784nullptr, llvm::GlobalValue::NotThreadLocal,1785CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());1786Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);1787}17881789OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(1790Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),1791llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,1792llvm::GlobalValue::WeakODRLinkage);1793}17941795Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,1796QualType VarType,1797StringRef Name) {1798std::string Suffix = getName({"artificial", ""});1799llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);1800llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(1801VarLVType, Twine(Name).concat(Suffix).str());1802if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&1803CGM.getTarget().isTLSSupported()) {1804GAddr->setThreadLocal(/*Val=*/true);1805return Address(GAddr, GAddr->getValueType(),1806CGM.getContext().getTypeAlignInChars(VarType));1807}1808std::string CacheSuffix = getName({"cache", ""});1809llvm::Value *Args[] = {1810emitUpdateLocation(CGF, SourceLocation()),1811getThreadID(CGF, SourceLocation()),1812CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),1813CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,1814/*isSigned=*/false),1815OMPBuilder.getOrCreateInternalVariable(1816CGM.VoidPtrPtrTy,1817Twine(Name).concat(Suffix).concat(CacheSuffix).str())};1818return Address(1819CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(1820CGF.EmitRuntimeCall(1821OMPBuilder.getOrCreateRuntimeFunction(1822CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),1823Args),1824VarLVType->getPointerTo(/*AddrSpace=*/0)),1825VarLVType, CGM.getContext().getTypeAlignInChars(VarType));1826}18271828void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,1829const RegionCodeGenTy &ThenGen,1830const RegionCodeGenTy &ElseGen) {1831CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());18321833// If the condition constant folds and can be elided, try to avoid emitting1834// the condition and the dead arm of the if/else.1835bool CondConstant;1836if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {1837if (CondConstant)1838ThenGen(CGF);1839else1840ElseGen(CGF);1841return;1842}18431844// Otherwise, the condition did not fold, or we couldn't elide it. Just1845// emit the conditional branch.1846llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");1847llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");1848llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");1849CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);18501851// Emit the 'then' code.1852CGF.EmitBlock(ThenBlock);1853ThenGen(CGF);1854CGF.EmitBranch(ContBlock);1855// Emit the 'else' code if present.1856// There is no need to emit line number for unconditional branch.1857(void)ApplyDebugLocation::CreateEmpty(CGF);1858CGF.EmitBlock(ElseBlock);1859ElseGen(CGF);1860// There is no need to emit line number for unconditional branch.1861(void)ApplyDebugLocation::CreateEmpty(CGF);1862CGF.EmitBranch(ContBlock);1863// Emit the continuation block for code after the if.1864CGF.EmitBlock(ContBlock, /*IsFinished=*/true);1865}18661867void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,1868llvm::Function *OutlinedFn,1869ArrayRef<llvm::Value *> CapturedVars,1870const Expr *IfCond,1871llvm::Value *NumThreads) {1872if (!CGF.HaveInsertPoint())1873return;1874llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);1875auto &M = CGM.getModule();1876auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,1877this](CodeGenFunction &CGF, PrePostActionTy &) {1878// Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);1879CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();1880llvm::Value *Args[] = {1881RTLoc,1882CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars1883CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};1884llvm::SmallVector<llvm::Value *, 16> RealArgs;1885RealArgs.append(std::begin(Args), std::end(Args));1886RealArgs.append(CapturedVars.begin(), CapturedVars.end());18871888llvm::FunctionCallee RTLFn =1889OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);1890CGF.EmitRuntimeCall(RTLFn, RealArgs);1891};1892auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,1893this](CodeGenFunction &CGF, PrePostActionTy &) {1894CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();1895llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);1896// Build calls:1897// __kmpc_serialized_parallel(&Loc, GTid);1898llvm::Value *Args[] = {RTLoc, ThreadID};1899CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(1900M, OMPRTL___kmpc_serialized_parallel),1901Args);19021903// OutlinedFn(>id, &zero_bound, CapturedStruct);1904Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);1905RawAddress ZeroAddrBound =1906CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,1907/*Name=*/".bound.zero.addr");1908CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);1909llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;1910// ThreadId for serialized parallels is 0.1911OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));1912OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());1913OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());19141915// Ensure we do not inline the function. This is trivially true for the ones1916// passed to __kmpc_fork_call but the ones called in serialized regions1917// could be inlined. This is not a perfect but it is closer to the invariant1918// we want, namely, every data environment starts with a new function.1919// TODO: We should pass the if condition to the runtime function and do the1920// handling there. Much cleaner code.1921OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);1922OutlinedFn->addFnAttr(llvm::Attribute::NoInline);1923RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);19241925// __kmpc_end_serialized_parallel(&Loc, GTid);1926llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};1927CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(1928M, OMPRTL___kmpc_end_serialized_parallel),1929EndArgs);1930};1931if (IfCond) {1932emitIfClause(CGF, IfCond, ThenGen, ElseGen);1933} else {1934RegionCodeGenTy ThenRCG(ThenGen);1935ThenRCG(CGF);1936}1937}19381939// If we're inside an (outlined) parallel region, use the region info's1940// thread-ID variable (it is passed in a first argument of the outlined function1941// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in1942// regular serial code region, get thread ID by calling kmp_int321943// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and1944// return the address of that temp.1945Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,1946SourceLocation Loc) {1947if (auto *OMPRegionInfo =1948dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))1949if (OMPRegionInfo->getThreadIDVariable())1950return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();19511952llvm::Value *ThreadID = getThreadID(CGF, Loc);1953QualType Int32Ty =1954CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);1955Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");1956CGF.EmitStoreOfScalar(ThreadID,1957CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));19581959return ThreadIDTemp;1960}19611962llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {1963std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();1964std::string Name = getName({Prefix, "var"});1965return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);1966}19671968namespace {1969/// Common pre(post)-action for different OpenMP constructs.1970class CommonActionTy final : public PrePostActionTy {1971llvm::FunctionCallee EnterCallee;1972ArrayRef<llvm::Value *> EnterArgs;1973llvm::FunctionCallee ExitCallee;1974ArrayRef<llvm::Value *> ExitArgs;1975bool Conditional;1976llvm::BasicBlock *ContBlock = nullptr;19771978public:1979CommonActionTy(llvm::FunctionCallee EnterCallee,1980ArrayRef<llvm::Value *> EnterArgs,1981llvm::FunctionCallee ExitCallee,1982ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)1983: EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),1984ExitArgs(ExitArgs), Conditional(Conditional) {}1985void Enter(CodeGenFunction &CGF) override {1986llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);1987if (Conditional) {1988llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);1989auto *ThenBlock = CGF.createBasicBlock("omp_if.then");1990ContBlock = CGF.createBasicBlock("omp_if.end");1991// Generate the branch (If-stmt)1992CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);1993CGF.EmitBlock(ThenBlock);1994}1995}1996void Done(CodeGenFunction &CGF) {1997// Emit the rest of blocks/branches1998CGF.EmitBranch(ContBlock);1999CGF.EmitBlock(ContBlock, true);2000}2001void Exit(CodeGenFunction &CGF) override {2002CGF.EmitRuntimeCall(ExitCallee, ExitArgs);2003}2004};2005} // anonymous namespace20062007void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,2008StringRef CriticalName,2009const RegionCodeGenTy &CriticalOpGen,2010SourceLocation Loc, const Expr *Hint) {2011// __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);2012// CriticalOpGen();2013// __kmpc_end_critical(ident_t *, gtid, Lock);2014// Prepare arguments and build a call to __kmpc_critical2015if (!CGF.HaveInsertPoint())2016return;2017llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),2018getCriticalRegionLock(CriticalName)};2019llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),2020std::end(Args));2021if (Hint) {2022EnterArgs.push_back(CGF.Builder.CreateIntCast(2023CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));2024}2025CommonActionTy Action(2026OMPBuilder.getOrCreateRuntimeFunction(2027CGM.getModule(),2028Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),2029EnterArgs,2030OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),2031OMPRTL___kmpc_end_critical),2032Args);2033CriticalOpGen.setAction(Action);2034emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);2035}20362037void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,2038const RegionCodeGenTy &MasterOpGen,2039SourceLocation Loc) {2040if (!CGF.HaveInsertPoint())2041return;2042// if(__kmpc_master(ident_t *, gtid)) {2043// MasterOpGen();2044// __kmpc_end_master(ident_t *, gtid);2045// }2046// Prepare arguments and build a call to __kmpc_master2047llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};2048CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(2049CGM.getModule(), OMPRTL___kmpc_master),2050Args,2051OMPBuilder.getOrCreateRuntimeFunction(2052CGM.getModule(), OMPRTL___kmpc_end_master),2053Args,2054/*Conditional=*/true);2055MasterOpGen.setAction(Action);2056emitInlinedDirective(CGF, OMPD_master, MasterOpGen);2057Action.Done(CGF);2058}20592060void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,2061const RegionCodeGenTy &MaskedOpGen,2062SourceLocation Loc, const Expr *Filter) {2063if (!CGF.HaveInsertPoint())2064return;2065// if(__kmpc_masked(ident_t *, gtid, filter)) {2066// MaskedOpGen();2067// __kmpc_end_masked(iden_t *, gtid);2068// }2069// Prepare arguments and build a call to __kmpc_masked2070llvm::Value *FilterVal = Filter2071? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)2072: llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);2073llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),2074FilterVal};2075llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),2076getThreadID(CGF, Loc)};2077CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(2078CGM.getModule(), OMPRTL___kmpc_masked),2079Args,2080OMPBuilder.getOrCreateRuntimeFunction(2081CGM.getModule(), OMPRTL___kmpc_end_masked),2082ArgsEnd,2083/*Conditional=*/true);2084MaskedOpGen.setAction(Action);2085emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);2086Action.Done(CGF);2087}20882089void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,2090SourceLocation Loc) {2091if (!CGF.HaveInsertPoint())2092return;2093if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {2094OMPBuilder.createTaskyield(CGF.Builder);2095} else {2096// Build call __kmpc_omp_taskyield(loc, thread_id, 0);2097llvm::Value *Args[] = {2098emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),2099llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};2100CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(2101CGM.getModule(), OMPRTL___kmpc_omp_taskyield),2102Args);2103}21042105if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))2106Region->emitUntiedSwitch(CGF);2107}21082109void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,2110const RegionCodeGenTy &TaskgroupOpGen,2111SourceLocation Loc) {2112if (!CGF.HaveInsertPoint())2113return;2114// __kmpc_taskgroup(ident_t *, gtid);2115// TaskgroupOpGen();2116// __kmpc_end_taskgroup(ident_t *, gtid);2117// Prepare arguments and build a call to __kmpc_taskgroup2118llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};2119CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(2120CGM.getModule(), OMPRTL___kmpc_taskgroup),2121Args,2122OMPBuilder.getOrCreateRuntimeFunction(2123CGM.getModule(), OMPRTL___kmpc_end_taskgroup),2124Args);2125TaskgroupOpGen.setAction(Action);2126emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);2127}21282129/// Given an array of pointers to variables, project the address of a2130/// given variable.2131static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,2132unsigned Index, const VarDecl *Var) {2133// Pull out the pointer to the variable.2134Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);2135llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);21362137llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());2138return Address(2139CGF.Builder.CreateBitCast(2140Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),2141ElemTy, CGF.getContext().getDeclAlign(Var));2142}21432144static llvm::Value *emitCopyprivateCopyFunction(2145CodeGenModule &CGM, llvm::Type *ArgsElemType,2146ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,2147ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,2148SourceLocation Loc) {2149ASTContext &C = CGM.getContext();2150// void copy_func(void *LHSArg, void *RHSArg);2151FunctionArgList Args;2152ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,2153ImplicitParamKind::Other);2154ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,2155ImplicitParamKind::Other);2156Args.push_back(&LHSArg);2157Args.push_back(&RHSArg);2158const auto &CGFI =2159CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);2160std::string Name =2161CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});2162auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),2163llvm::GlobalValue::InternalLinkage, Name,2164&CGM.getModule());2165CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);2166Fn->setDoesNotRecurse();2167CodeGenFunction CGF(CGM);2168CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);2169// Dest = (void*[n])(LHSArg);2170// Src = (void*[n])(RHSArg);2171Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(2172CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),2173ArgsElemType->getPointerTo()),2174ArgsElemType, CGF.getPointerAlign());2175Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(2176CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),2177ArgsElemType->getPointerTo()),2178ArgsElemType, CGF.getPointerAlign());2179// *(Type0*)Dst[0] = *(Type0*)Src[0];2180// *(Type1*)Dst[1] = *(Type1*)Src[1];2181// ...2182// *(Typen*)Dst[n] = *(Typen*)Src[n];2183for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {2184const auto *DestVar =2185cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());2186Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);21872188const auto *SrcVar =2189cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());2190Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);21912192const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();2193QualType Type = VD->getType();2194CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);2195}2196CGF.FinishFunction();2197return Fn;2198}21992200void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,2201const RegionCodeGenTy &SingleOpGen,2202SourceLocation Loc,2203ArrayRef<const Expr *> CopyprivateVars,2204ArrayRef<const Expr *> SrcExprs,2205ArrayRef<const Expr *> DstExprs,2206ArrayRef<const Expr *> AssignmentOps) {2207if (!CGF.HaveInsertPoint())2208return;2209assert(CopyprivateVars.size() == SrcExprs.size() &&2210CopyprivateVars.size() == DstExprs.size() &&2211CopyprivateVars.size() == AssignmentOps.size());2212ASTContext &C = CGM.getContext();2213// int32 did_it = 0;2214// if(__kmpc_single(ident_t *, gtid)) {2215// SingleOpGen();2216// __kmpc_end_single(ident_t *, gtid);2217// did_it = 1;2218// }2219// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,2220// <copy_func>, did_it);22212222Address DidIt = Address::invalid();2223if (!CopyprivateVars.empty()) {2224// int32 did_it = 0;2225QualType KmpInt32Ty =2226C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);2227DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");2228CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);2229}2230// Prepare arguments and build a call to __kmpc_single2231llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};2232CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(2233CGM.getModule(), OMPRTL___kmpc_single),2234Args,2235OMPBuilder.getOrCreateRuntimeFunction(2236CGM.getModule(), OMPRTL___kmpc_end_single),2237Args,2238/*Conditional=*/true);2239SingleOpGen.setAction(Action);2240emitInlinedDirective(CGF, OMPD_single, SingleOpGen);2241if (DidIt.isValid()) {2242// did_it = 1;2243CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);2244}2245Action.Done(CGF);2246// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,2247// <copy_func>, did_it);2248if (DidIt.isValid()) {2249llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());2250QualType CopyprivateArrayTy = C.getConstantArrayType(2251C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,2252/*IndexTypeQuals=*/0);2253// Create a list of all private variables for copyprivate.2254Address CopyprivateList =2255CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");2256for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {2257Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);2258CGF.Builder.CreateStore(2259CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(2260CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),2261CGF.VoidPtrTy),2262Elem);2263}2264// Build function that copies private values from single region to all other2265// threads in the corresponding parallel region.2266llvm::Value *CpyFn = emitCopyprivateCopyFunction(2267CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,2268SrcExprs, DstExprs, AssignmentOps, Loc);2269llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);2270Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(2271CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);2272llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);2273llvm::Value *Args[] = {2274emitUpdateLocation(CGF, Loc), // ident_t *<loc>2275getThreadID(CGF, Loc), // i32 <gtid>2276BufSize, // size_t <buf_size>2277CL.emitRawPointer(CGF), // void *<copyprivate list>2278CpyFn, // void (*) (void *, void *) <copy_func>2279DidItVal // i32 did_it2280};2281CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(2282CGM.getModule(), OMPRTL___kmpc_copyprivate),2283Args);2284}2285}22862287void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,2288const RegionCodeGenTy &OrderedOpGen,2289SourceLocation Loc, bool IsThreads) {2290if (!CGF.HaveInsertPoint())2291return;2292// __kmpc_ordered(ident_t *, gtid);2293// OrderedOpGen();2294// __kmpc_end_ordered(ident_t *, gtid);2295// Prepare arguments and build a call to __kmpc_ordered2296if (IsThreads) {2297llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};2298CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(2299CGM.getModule(), OMPRTL___kmpc_ordered),2300Args,2301OMPBuilder.getOrCreateRuntimeFunction(2302CGM.getModule(), OMPRTL___kmpc_end_ordered),2303Args);2304OrderedOpGen.setAction(Action);2305emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);2306return;2307}2308emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);2309}23102311unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {2312unsigned Flags;2313if (Kind == OMPD_for)2314Flags = OMP_IDENT_BARRIER_IMPL_FOR;2315else if (Kind == OMPD_sections)2316Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;2317else if (Kind == OMPD_single)2318Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;2319else if (Kind == OMPD_barrier)2320Flags = OMP_IDENT_BARRIER_EXPL;2321else2322Flags = OMP_IDENT_BARRIER_IMPL;2323return Flags;2324}23252326void CGOpenMPRuntime::getDefaultScheduleAndChunk(2327CodeGenFunction &CGF, const OMPLoopDirective &S,2328OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {2329// Check if the loop directive is actually a doacross loop directive. In this2330// case choose static, 1 schedule.2331if (llvm::any_of(2332S.getClausesOfKind<OMPOrderedClause>(),2333[](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {2334ScheduleKind = OMPC_SCHEDULE_static;2335// Chunk size is 1 in this case.2336llvm::APInt ChunkSize(32, 1);2337ChunkExpr = IntegerLiteral::Create(2338CGF.getContext(), ChunkSize,2339CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),2340SourceLocation());2341}2342}23432344void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,2345OpenMPDirectiveKind Kind, bool EmitChecks,2346bool ForceSimpleCall) {2347// Check if we should use the OMPBuilder2348auto *OMPRegionInfo =2349dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);2350if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {2351CGF.Builder.restoreIP(OMPBuilder.createBarrier(2352CGF.Builder, Kind, ForceSimpleCall, EmitChecks));2353return;2354}23552356if (!CGF.HaveInsertPoint())2357return;2358// Build call __kmpc_cancel_barrier(loc, thread_id);2359// Build call __kmpc_barrier(loc, thread_id);2360unsigned Flags = getDefaultFlagsForBarriers(Kind);2361// Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,2362// thread_id);2363llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),2364getThreadID(CGF, Loc)};2365if (OMPRegionInfo) {2366if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {2367llvm::Value *Result = CGF.EmitRuntimeCall(2368OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),2369OMPRTL___kmpc_cancel_barrier),2370Args);2371if (EmitChecks) {2372// if (__kmpc_cancel_barrier()) {2373// exit from construct;2374// }2375llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");2376llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");2377llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);2378CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);2379CGF.EmitBlock(ExitBB);2380// exit from construct;2381CodeGenFunction::JumpDest CancelDestination =2382CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());2383CGF.EmitBranchThroughCleanup(CancelDestination);2384CGF.EmitBlock(ContBB, /*IsFinished=*/true);2385}2386return;2387}2388}2389CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(2390CGM.getModule(), OMPRTL___kmpc_barrier),2391Args);2392}23932394void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,2395Expr *ME, bool IsFatal) {2396llvm::Value *MVL =2397ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)2398: llvm::ConstantPointerNull::get(CGF.VoidPtrTy);2399// Build call void __kmpc_error(ident_t *loc, int severity, const char2400// *message)2401llvm::Value *Args[] = {2402emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),2403llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),2404CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};2405CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(2406CGM.getModule(), OMPRTL___kmpc_error),2407Args);2408}24092410/// Map the OpenMP loop schedule to the runtime enumeration.2411static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,2412bool Chunked, bool Ordered) {2413switch (ScheduleKind) {2414case OMPC_SCHEDULE_static:2415return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)2416: (Ordered ? OMP_ord_static : OMP_sch_static);2417case OMPC_SCHEDULE_dynamic:2418return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;2419case OMPC_SCHEDULE_guided:2420return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;2421case OMPC_SCHEDULE_runtime:2422return Ordered ? OMP_ord_runtime : OMP_sch_runtime;2423case OMPC_SCHEDULE_auto:2424return Ordered ? OMP_ord_auto : OMP_sch_auto;2425case OMPC_SCHEDULE_unknown:2426assert(!Chunked && "chunk was specified but schedule kind not known");2427return Ordered ? OMP_ord_static : OMP_sch_static;2428}2429llvm_unreachable("Unexpected runtime schedule");2430}24312432/// Map the OpenMP distribute schedule to the runtime enumeration.2433static OpenMPSchedType2434getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {2435// only static is allowed for dist_schedule2436return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;2437}24382439bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,2440bool Chunked) const {2441OpenMPSchedType Schedule =2442getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);2443return Schedule == OMP_sch_static;2444}24452446bool CGOpenMPRuntime::isStaticNonchunked(2447OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {2448OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);2449return Schedule == OMP_dist_sch_static;2450}24512452bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,2453bool Chunked) const {2454OpenMPSchedType Schedule =2455getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);2456return Schedule == OMP_sch_static_chunked;2457}24582459bool CGOpenMPRuntime::isStaticChunked(2460OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {2461OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);2462return Schedule == OMP_dist_sch_static_chunked;2463}24642465bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {2466OpenMPSchedType Schedule =2467getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);2468assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");2469return Schedule != OMP_sch_static;2470}24712472static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,2473OpenMPScheduleClauseModifier M1,2474OpenMPScheduleClauseModifier M2) {2475int Modifier = 0;2476switch (M1) {2477case OMPC_SCHEDULE_MODIFIER_monotonic:2478Modifier = OMP_sch_modifier_monotonic;2479break;2480case OMPC_SCHEDULE_MODIFIER_nonmonotonic:2481Modifier = OMP_sch_modifier_nonmonotonic;2482break;2483case OMPC_SCHEDULE_MODIFIER_simd:2484if (Schedule == OMP_sch_static_chunked)2485Schedule = OMP_sch_static_balanced_chunked;2486break;2487case OMPC_SCHEDULE_MODIFIER_last:2488case OMPC_SCHEDULE_MODIFIER_unknown:2489break;2490}2491switch (M2) {2492case OMPC_SCHEDULE_MODIFIER_monotonic:2493Modifier = OMP_sch_modifier_monotonic;2494break;2495case OMPC_SCHEDULE_MODIFIER_nonmonotonic:2496Modifier = OMP_sch_modifier_nonmonotonic;2497break;2498case OMPC_SCHEDULE_MODIFIER_simd:2499if (Schedule == OMP_sch_static_chunked)2500Schedule = OMP_sch_static_balanced_chunked;2501break;2502case OMPC_SCHEDULE_MODIFIER_last:2503case OMPC_SCHEDULE_MODIFIER_unknown:2504break;2505}2506// OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.2507// If the static schedule kind is specified or if the ordered clause is2508// specified, and if the nonmonotonic modifier is not specified, the effect is2509// as if the monotonic modifier is specified. Otherwise, unless the monotonic2510// modifier is specified, the effect is as if the nonmonotonic modifier is2511// specified.2512if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {2513if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||2514Schedule == OMP_sch_static_balanced_chunked ||2515Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||2516Schedule == OMP_dist_sch_static_chunked ||2517Schedule == OMP_dist_sch_static))2518Modifier = OMP_sch_modifier_nonmonotonic;2519}2520return Schedule | Modifier;2521}25222523void CGOpenMPRuntime::emitForDispatchInit(2524CodeGenFunction &CGF, SourceLocation Loc,2525const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,2526bool Ordered, const DispatchRTInput &DispatchValues) {2527if (!CGF.HaveInsertPoint())2528return;2529OpenMPSchedType Schedule = getRuntimeSchedule(2530ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);2531assert(Ordered ||2532(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&2533Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&2534Schedule != OMP_sch_static_balanced_chunked));2535// Call __kmpc_dispatch_init(2536// ident_t *loc, kmp_int32 tid, kmp_int32 schedule,2537// kmp_int[32|64] lower, kmp_int[32|64] upper,2538// kmp_int[32|64] stride, kmp_int[32|64] chunk);25392540// If the Chunk was not specified in the clause - use default value 1.2541llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk2542: CGF.Builder.getIntN(IVSize, 1);2543llvm::Value *Args[] = {2544emitUpdateLocation(CGF, Loc),2545getThreadID(CGF, Loc),2546CGF.Builder.getInt32(addMonoNonMonoModifier(2547CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type2548DispatchValues.LB, // Lower2549DispatchValues.UB, // Upper2550CGF.Builder.getIntN(IVSize, 1), // Stride2551Chunk // Chunk2552};2553CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),2554Args);2555}25562557void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,2558SourceLocation Loc) {2559if (!CGF.HaveInsertPoint())2560return;2561// Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);2562llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};2563CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);2564}25652566static void emitForStaticInitCall(2567CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,2568llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,2569OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,2570const CGOpenMPRuntime::StaticRTInput &Values) {2571if (!CGF.HaveInsertPoint())2572return;25732574assert(!Values.Ordered);2575assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||2576Schedule == OMP_sch_static_balanced_chunked ||2577Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||2578Schedule == OMP_dist_sch_static ||2579Schedule == OMP_dist_sch_static_chunked);25802581// Call __kmpc_for_static_init(2582// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,2583// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,2584// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,2585// kmp_int[32|64] incr, kmp_int[32|64] chunk);2586llvm::Value *Chunk = Values.Chunk;2587if (Chunk == nullptr) {2588assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||2589Schedule == OMP_dist_sch_static) &&2590"expected static non-chunked schedule");2591// If the Chunk was not specified in the clause - use default value 1.2592Chunk = CGF.Builder.getIntN(Values.IVSize, 1);2593} else {2594assert((Schedule == OMP_sch_static_chunked ||2595Schedule == OMP_sch_static_balanced_chunked ||2596Schedule == OMP_ord_static_chunked ||2597Schedule == OMP_dist_sch_static_chunked) &&2598"expected static chunked schedule");2599}2600llvm::Value *Args[] = {2601UpdateLocation,2602ThreadId,2603CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,2604M2)), // Schedule type2605Values.IL.emitRawPointer(CGF), // &isLastIter2606Values.LB.emitRawPointer(CGF), // &LB2607Values.UB.emitRawPointer(CGF), // &UB2608Values.ST.emitRawPointer(CGF), // &Stride2609CGF.Builder.getIntN(Values.IVSize, 1), // Incr2610Chunk // Chunk2611};2612CGF.EmitRuntimeCall(ForStaticInitFunction, Args);2613}26142615void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,2616SourceLocation Loc,2617OpenMPDirectiveKind DKind,2618const OpenMPScheduleTy &ScheduleKind,2619const StaticRTInput &Values) {2620OpenMPSchedType ScheduleNum = getRuntimeSchedule(2621ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);2622assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&2623"Expected loop-based or sections-based directive.");2624llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,2625isOpenMPLoopDirective(DKind)2626? OMP_IDENT_WORK_LOOP2627: OMP_IDENT_WORK_SECTIONS);2628llvm::Value *ThreadId = getThreadID(CGF, Loc);2629llvm::FunctionCallee StaticInitFunction =2630OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,2631false);2632auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);2633emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,2634ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);2635}26362637void CGOpenMPRuntime::emitDistributeStaticInit(2638CodeGenFunction &CGF, SourceLocation Loc,2639OpenMPDistScheduleClauseKind SchedKind,2640const CGOpenMPRuntime::StaticRTInput &Values) {2641OpenMPSchedType ScheduleNum =2642getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);2643llvm::Value *UpdatedLocation =2644emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);2645llvm::Value *ThreadId = getThreadID(CGF, Loc);2646llvm::FunctionCallee StaticInitFunction;2647bool isGPUDistribute =2648CGM.getLangOpts().OpenMPIsTargetDevice &&2649(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());2650StaticInitFunction = OMPBuilder.createForStaticInitFunction(2651Values.IVSize, Values.IVSigned, isGPUDistribute);26522653emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,2654ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,2655OMPC_SCHEDULE_MODIFIER_unknown, Values);2656}26572658void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,2659SourceLocation Loc,2660OpenMPDirectiveKind DKind) {2661assert((DKind == OMPD_distribute || DKind == OMPD_for ||2662DKind == OMPD_sections) &&2663"Expected distribute, for, or sections directive kind");2664if (!CGF.HaveInsertPoint())2665return;2666// Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);2667llvm::Value *Args[] = {2668emitUpdateLocation(CGF, Loc,2669isOpenMPDistributeDirective(DKind) ||2670(DKind == OMPD_target_teams_loop)2671? OMP_IDENT_WORK_DISTRIBUTE2672: isOpenMPLoopDirective(DKind)2673? OMP_IDENT_WORK_LOOP2674: OMP_IDENT_WORK_SECTIONS),2675getThreadID(CGF, Loc)};2676auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);2677if (isOpenMPDistributeDirective(DKind) &&2678CGM.getLangOpts().OpenMPIsTargetDevice &&2679(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))2680CGF.EmitRuntimeCall(2681OMPBuilder.getOrCreateRuntimeFunction(2682CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),2683Args);2684else2685CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(2686CGM.getModule(), OMPRTL___kmpc_for_static_fini),2687Args);2688}26892690void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,2691SourceLocation Loc,2692unsigned IVSize,2693bool IVSigned) {2694if (!CGF.HaveInsertPoint())2695return;2696// Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);2697llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};2698CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),2699Args);2700}27012702llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,2703SourceLocation Loc, unsigned IVSize,2704bool IVSigned, Address IL,2705Address LB, Address UB,2706Address ST) {2707// Call __kmpc_dispatch_next(2708// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,2709// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,2710// kmp_int[32|64] *p_stride);2711llvm::Value *Args[] = {2712emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),2713IL.emitRawPointer(CGF), // &isLastIter2714LB.emitRawPointer(CGF), // &Lower2715UB.emitRawPointer(CGF), // &Upper2716ST.emitRawPointer(CGF) // &Stride2717};2718llvm::Value *Call = CGF.EmitRuntimeCall(2719OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);2720return CGF.EmitScalarConversion(2721Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),2722CGF.getContext().BoolTy, Loc);2723}27242725void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,2726llvm::Value *NumThreads,2727SourceLocation Loc) {2728if (!CGF.HaveInsertPoint())2729return;2730// Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)2731llvm::Value *Args[] = {2732emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),2733CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};2734CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(2735CGM.getModule(), OMPRTL___kmpc_push_num_threads),2736Args);2737}27382739void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,2740ProcBindKind ProcBind,2741SourceLocation Loc) {2742if (!CGF.HaveInsertPoint())2743return;2744assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");2745// Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)2746llvm::Value *Args[] = {2747emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),2748llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};2749CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(2750CGM.getModule(), OMPRTL___kmpc_push_proc_bind),2751Args);2752}27532754void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,2755SourceLocation Loc, llvm::AtomicOrdering AO) {2756if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {2757OMPBuilder.createFlush(CGF.Builder);2758} else {2759if (!CGF.HaveInsertPoint())2760return;2761// Build call void __kmpc_flush(ident_t *loc)2762CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(2763CGM.getModule(), OMPRTL___kmpc_flush),2764emitUpdateLocation(CGF, Loc));2765}2766}27672768namespace {2769/// Indexes of fields for type kmp_task_t.2770enum KmpTaskTFields {2771/// List of shared variables.2772KmpTaskTShareds,2773/// Task routine.2774KmpTaskTRoutine,2775/// Partition id for the untied tasks.2776KmpTaskTPartId,2777/// Function with call of destructors for private variables.2778Data1,2779/// Task priority.2780Data2,2781/// (Taskloops only) Lower bound.2782KmpTaskTLowerBound,2783/// (Taskloops only) Upper bound.2784KmpTaskTUpperBound,2785/// (Taskloops only) Stride.2786KmpTaskTStride,2787/// (Taskloops only) Is last iteration flag.2788KmpTaskTLastIter,2789/// (Taskloops only) Reduction data.2790KmpTaskTReductions,2791};2792} // anonymous namespace27932794void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {2795// If we are in simd mode or there are no entries, we don't need to do2796// anything.2797if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())2798return;27992800llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =2801[this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,2802const llvm::TargetRegionEntryInfo &EntryInfo) -> void {2803SourceLocation Loc;2804if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {2805for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),2806E = CGM.getContext().getSourceManager().fileinfo_end();2807I != E; ++I) {2808if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&2809I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {2810Loc = CGM.getContext().getSourceManager().translateFileLineCol(2811I->getFirst(), EntryInfo.Line, 1);2812break;2813}2814}2815}2816switch (Kind) {2817case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {2818unsigned DiagID = CGM.getDiags().getCustomDiagID(2819DiagnosticsEngine::Error, "Offloading entry for target region in "2820"%0 is incorrect: either the "2821"address or the ID is invalid.");2822CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;2823} break;2824case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {2825unsigned DiagID = CGM.getDiags().getCustomDiagID(2826DiagnosticsEngine::Error, "Offloading entry for declare target "2827"variable %0 is incorrect: the "2828"address is invalid.");2829CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;2830} break;2831case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {2832unsigned DiagID = CGM.getDiags().getCustomDiagID(2833DiagnosticsEngine::Error,2834"Offloading entry for declare target variable is incorrect: the "2835"address is invalid.");2836CGM.getDiags().Report(DiagID);2837} break;2838}2839};28402841OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);2842}28432844void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {2845if (!KmpRoutineEntryPtrTy) {2846// Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.2847ASTContext &C = CGM.getContext();2848QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};2849FunctionProtoType::ExtProtoInfo EPI;2850KmpRoutineEntryPtrQTy = C.getPointerType(2851C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));2852KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);2853}2854}28552856namespace {2857struct PrivateHelpersTy {2858PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,2859const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)2860: OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),2861PrivateElemInit(PrivateElemInit) {}2862PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}2863const Expr *OriginalRef = nullptr;2864const VarDecl *Original = nullptr;2865const VarDecl *PrivateCopy = nullptr;2866const VarDecl *PrivateElemInit = nullptr;2867bool isLocalPrivate() const {2868return !OriginalRef && !PrivateCopy && !PrivateElemInit;2869}2870};2871typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;2872} // anonymous namespace28732874static bool isAllocatableDecl(const VarDecl *VD) {2875const VarDecl *CVD = VD->getCanonicalDecl();2876if (!CVD->hasAttr<OMPAllocateDeclAttr>())2877return false;2878const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();2879// Use the default allocation.2880return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&2881!AA->getAllocator());2882}28832884static RecordDecl *2885createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {2886if (!Privates.empty()) {2887ASTContext &C = CGM.getContext();2888// Build struct .kmp_privates_t. {2889// /* private vars */2890// };2891RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");2892RD->startDefinition();2893for (const auto &Pair : Privates) {2894const VarDecl *VD = Pair.second.Original;2895QualType Type = VD->getType().getNonReferenceType();2896// If the private variable is a local variable with lvalue ref type,2897// allocate the pointer instead of the pointee type.2898if (Pair.second.isLocalPrivate()) {2899if (VD->getType()->isLValueReferenceType())2900Type = C.getPointerType(Type);2901if (isAllocatableDecl(VD))2902Type = C.getPointerType(Type);2903}2904FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);2905if (VD->hasAttrs()) {2906for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),2907E(VD->getAttrs().end());2908I != E; ++I)2909FD->addAttr(*I);2910}2911}2912RD->completeDefinition();2913return RD;2914}2915return nullptr;2916}29172918static RecordDecl *2919createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,2920QualType KmpInt32Ty,2921QualType KmpRoutineEntryPointerQTy) {2922ASTContext &C = CGM.getContext();2923// Build struct kmp_task_t {2924// void * shareds;2925// kmp_routine_entry_t routine;2926// kmp_int32 part_id;2927// kmp_cmplrdata_t data1;2928// kmp_cmplrdata_t data2;2929// For taskloops additional fields:2930// kmp_uint64 lb;2931// kmp_uint64 ub;2932// kmp_int64 st;2933// kmp_int32 liter;2934// void * reductions;2935// };2936RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);2937UD->startDefinition();2938addFieldToRecordDecl(C, UD, KmpInt32Ty);2939addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);2940UD->completeDefinition();2941QualType KmpCmplrdataTy = C.getRecordType(UD);2942RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");2943RD->startDefinition();2944addFieldToRecordDecl(C, RD, C.VoidPtrTy);2945addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);2946addFieldToRecordDecl(C, RD, KmpInt32Ty);2947addFieldToRecordDecl(C, RD, KmpCmplrdataTy);2948addFieldToRecordDecl(C, RD, KmpCmplrdataTy);2949if (isOpenMPTaskLoopDirective(Kind)) {2950QualType KmpUInt64Ty =2951CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);2952QualType KmpInt64Ty =2953CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);2954addFieldToRecordDecl(C, RD, KmpUInt64Ty);2955addFieldToRecordDecl(C, RD, KmpUInt64Ty);2956addFieldToRecordDecl(C, RD, KmpInt64Ty);2957addFieldToRecordDecl(C, RD, KmpInt32Ty);2958addFieldToRecordDecl(C, RD, C.VoidPtrTy);2959}2960RD->completeDefinition();2961return RD;2962}29632964static RecordDecl *2965createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,2966ArrayRef<PrivateDataTy> Privates) {2967ASTContext &C = CGM.getContext();2968// Build struct kmp_task_t_with_privates {2969// kmp_task_t task_data;2970// .kmp_privates_t. privates;2971// };2972RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");2973RD->startDefinition();2974addFieldToRecordDecl(C, RD, KmpTaskTQTy);2975if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))2976addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));2977RD->completeDefinition();2978return RD;2979}29802981/// Emit a proxy function which accepts kmp_task_t as the second2982/// argument.2983/// \code2984/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {2985/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,2986/// For taskloops:2987/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,2988/// tt->reductions, tt->shareds);2989/// return 0;2990/// }2991/// \endcode2992static llvm::Function *2993emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,2994OpenMPDirectiveKind Kind, QualType KmpInt32Ty,2995QualType KmpTaskTWithPrivatesPtrQTy,2996QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,2997QualType SharedsPtrTy, llvm::Function *TaskFunction,2998llvm::Value *TaskPrivatesMap) {2999ASTContext &C = CGM.getContext();3000FunctionArgList Args;3001ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,3002ImplicitParamKind::Other);3003ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,3004KmpTaskTWithPrivatesPtrQTy.withRestrict(),3005ImplicitParamKind::Other);3006Args.push_back(&GtidArg);3007Args.push_back(&TaskTypeArg);3008const auto &TaskEntryFnInfo =3009CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);3010llvm::FunctionType *TaskEntryTy =3011CGM.getTypes().GetFunctionType(TaskEntryFnInfo);3012std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});3013auto *TaskEntry = llvm::Function::Create(3014TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());3015CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);3016TaskEntry->setDoesNotRecurse();3017CodeGenFunction CGF(CGM);3018CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,3019Loc, Loc);30203021// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,3022// tt,3023// For taskloops:3024// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,3025// tt->task_data.shareds);3026llvm::Value *GtidParam = CGF.EmitLoadOfScalar(3027CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);3028LValue TDBase = CGF.EmitLoadOfPointerLValue(3029CGF.GetAddrOfLocalVar(&TaskTypeArg),3030KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());3031const auto *KmpTaskTWithPrivatesQTyRD =3032cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());3033LValue Base =3034CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());3035const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());3036auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);3037LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);3038llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);30393040auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);3041LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);3042llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(3043CGF.EmitLoadOfScalar(SharedsLVal, Loc),3044CGF.ConvertTypeForMem(SharedsPtrTy));30453046auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);3047llvm::Value *PrivatesParam;3048if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {3049LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);3050PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(3051PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);3052} else {3053PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);3054}30553056llvm::Value *CommonArgs[] = {3057GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,3058CGF.Builder3059.CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),3060CGF.VoidPtrTy, CGF.Int8Ty)3061.emitRawPointer(CGF)};3062SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),3063std::end(CommonArgs));3064if (isOpenMPTaskLoopDirective(Kind)) {3065auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);3066LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);3067llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);3068auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);3069LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);3070llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);3071auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);3072LValue StLVal = CGF.EmitLValueForField(Base, *StFI);3073llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);3074auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);3075LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);3076llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);3077auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);3078LValue RLVal = CGF.EmitLValueForField(Base, *RFI);3079llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);3080CallArgs.push_back(LBParam);3081CallArgs.push_back(UBParam);3082CallArgs.push_back(StParam);3083CallArgs.push_back(LIParam);3084CallArgs.push_back(RParam);3085}3086CallArgs.push_back(SharedsParam);30873088CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,3089CallArgs);3090CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),3091CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));3092CGF.FinishFunction();3093return TaskEntry;3094}30953096static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,3097SourceLocation Loc,3098QualType KmpInt32Ty,3099QualType KmpTaskTWithPrivatesPtrQTy,3100QualType KmpTaskTWithPrivatesQTy) {3101ASTContext &C = CGM.getContext();3102FunctionArgList Args;3103ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,3104ImplicitParamKind::Other);3105ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,3106KmpTaskTWithPrivatesPtrQTy.withRestrict(),3107ImplicitParamKind::Other);3108Args.push_back(&GtidArg);3109Args.push_back(&TaskTypeArg);3110const auto &DestructorFnInfo =3111CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);3112llvm::FunctionType *DestructorFnTy =3113CGM.getTypes().GetFunctionType(DestructorFnInfo);3114std::string Name =3115CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});3116auto *DestructorFn =3117llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,3118Name, &CGM.getModule());3119CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,3120DestructorFnInfo);3121DestructorFn->setDoesNotRecurse();3122CodeGenFunction CGF(CGM);3123CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,3124Args, Loc, Loc);31253126LValue Base = CGF.EmitLoadOfPointerLValue(3127CGF.GetAddrOfLocalVar(&TaskTypeArg),3128KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());3129const auto *KmpTaskTWithPrivatesQTyRD =3130cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());3131auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());3132Base = CGF.EmitLValueForField(Base, *FI);3133for (const auto *Field :3134cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {3135if (QualType::DestructionKind DtorKind =3136Field->getType().isDestructedType()) {3137LValue FieldLValue = CGF.EmitLValueForField(Base, Field);3138CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());3139}3140}3141CGF.FinishFunction();3142return DestructorFn;3143}31443145/// Emit a privates mapping function for correct handling of private and3146/// firstprivate variables.3147/// \code3148/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>3149/// **noalias priv1,..., <tyn> **noalias privn) {3150/// *priv1 = &.privates.priv1;3151/// ...;3152/// *privn = &.privates.privn;3153/// }3154/// \endcode3155static llvm::Value *3156emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,3157const OMPTaskDataTy &Data, QualType PrivatesQTy,3158ArrayRef<PrivateDataTy> Privates) {3159ASTContext &C = CGM.getContext();3160FunctionArgList Args;3161ImplicitParamDecl TaskPrivatesArg(3162C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,3163C.getPointerType(PrivatesQTy).withConst().withRestrict(),3164ImplicitParamKind::Other);3165Args.push_back(&TaskPrivatesArg);3166llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;3167unsigned Counter = 1;3168for (const Expr *E : Data.PrivateVars) {3169Args.push_back(ImplicitParamDecl::Create(3170C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,3171C.getPointerType(C.getPointerType(E->getType()))3172.withConst()3173.withRestrict(),3174ImplicitParamKind::Other));3175const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());3176PrivateVarsPos[VD] = Counter;3177++Counter;3178}3179for (const Expr *E : Data.FirstprivateVars) {3180Args.push_back(ImplicitParamDecl::Create(3181C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,3182C.getPointerType(C.getPointerType(E->getType()))3183.withConst()3184.withRestrict(),3185ImplicitParamKind::Other));3186const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());3187PrivateVarsPos[VD] = Counter;3188++Counter;3189}3190for (const Expr *E : Data.LastprivateVars) {3191Args.push_back(ImplicitParamDecl::Create(3192C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,3193C.getPointerType(C.getPointerType(E->getType()))3194.withConst()3195.withRestrict(),3196ImplicitParamKind::Other));3197const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());3198PrivateVarsPos[VD] = Counter;3199++Counter;3200}3201for (const VarDecl *VD : Data.PrivateLocals) {3202QualType Ty = VD->getType().getNonReferenceType();3203if (VD->getType()->isLValueReferenceType())3204Ty = C.getPointerType(Ty);3205if (isAllocatableDecl(VD))3206Ty = C.getPointerType(Ty);3207Args.push_back(ImplicitParamDecl::Create(3208C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,3209C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),3210ImplicitParamKind::Other));3211PrivateVarsPos[VD] = Counter;3212++Counter;3213}3214const auto &TaskPrivatesMapFnInfo =3215CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);3216llvm::FunctionType *TaskPrivatesMapTy =3217CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);3218std::string Name =3219CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});3220auto *TaskPrivatesMap = llvm::Function::Create(3221TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,3222&CGM.getModule());3223CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,3224TaskPrivatesMapFnInfo);3225if (CGM.getLangOpts().Optimize) {3226TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);3227TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);3228TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);3229}3230CodeGenFunction CGF(CGM);3231CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,3232TaskPrivatesMapFnInfo, Args, Loc, Loc);32333234// *privi = &.privates.privi;3235LValue Base = CGF.EmitLoadOfPointerLValue(3236CGF.GetAddrOfLocalVar(&TaskPrivatesArg),3237TaskPrivatesArg.getType()->castAs<PointerType>());3238const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());3239Counter = 0;3240for (const FieldDecl *Field : PrivatesQTyRD->fields()) {3241LValue FieldLVal = CGF.EmitLValueForField(Base, Field);3242const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];3243LValue RefLVal =3244CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());3245LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(3246RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());3247CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);3248++Counter;3249}3250CGF.FinishFunction();3251return TaskPrivatesMap;3252}32533254/// Emit initialization for private variables in task-based directives.3255static void emitPrivatesInit(CodeGenFunction &CGF,3256const OMPExecutableDirective &D,3257Address KmpTaskSharedsPtr, LValue TDBase,3258const RecordDecl *KmpTaskTWithPrivatesQTyRD,3259QualType SharedsTy, QualType SharedsPtrTy,3260const OMPTaskDataTy &Data,3261ArrayRef<PrivateDataTy> Privates, bool ForDup) {3262ASTContext &C = CGF.getContext();3263auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());3264LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);3265OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())3266? OMPD_taskloop3267: OMPD_task;3268const CapturedStmt &CS = *D.getCapturedStmt(Kind);3269CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);3270LValue SrcBase;3271bool IsTargetTask =3272isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||3273isOpenMPTargetExecutionDirective(D.getDirectiveKind());3274// For target-based directives skip 4 firstprivate arrays BasePointersArray,3275// PointersArray, SizesArray, and MappersArray. The original variables for3276// these arrays are not captured and we get their addresses explicitly.3277if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||3278(IsTargetTask && KmpTaskSharedsPtr.isValid())) {3279SrcBase = CGF.MakeAddrLValue(3280CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(3281KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),3282CGF.ConvertTypeForMem(SharedsTy)),3283SharedsTy);3284}3285FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();3286for (const PrivateDataTy &Pair : Privates) {3287// Do not initialize private locals.3288if (Pair.second.isLocalPrivate()) {3289++FI;3290continue;3291}3292const VarDecl *VD = Pair.second.PrivateCopy;3293const Expr *Init = VD->getAnyInitializer();3294if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&3295!CGF.isTrivialInitializer(Init)))) {3296LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);3297if (const VarDecl *Elem = Pair.second.PrivateElemInit) {3298const VarDecl *OriginalVD = Pair.second.Original;3299// Check if the variable is the target-based BasePointersArray,3300// PointersArray, SizesArray, or MappersArray.3301LValue SharedRefLValue;3302QualType Type = PrivateLValue.getType();3303const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);3304if (IsTargetTask && !SharedField) {3305assert(isa<ImplicitParamDecl>(OriginalVD) &&3306isa<CapturedDecl>(OriginalVD->getDeclContext()) &&3307cast<CapturedDecl>(OriginalVD->getDeclContext())3308->getNumParams() == 0 &&3309isa<TranslationUnitDecl>(3310cast<CapturedDecl>(OriginalVD->getDeclContext())3311->getDeclContext()) &&3312"Expected artificial target data variable.");3313SharedRefLValue =3314CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);3315} else if (ForDup) {3316SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);3317SharedRefLValue = CGF.MakeAddrLValue(3318SharedRefLValue.getAddress().withAlignment(3319C.getDeclAlign(OriginalVD)),3320SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),3321SharedRefLValue.getTBAAInfo());3322} else if (CGF.LambdaCaptureFields.count(3323Pair.second.Original->getCanonicalDecl()) > 0 ||3324isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {3325SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);3326} else {3327// Processing for implicitly captured variables.3328InlinedOpenMPRegionRAII Region(3329CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,3330/*HasCancel=*/false, /*NoInheritance=*/true);3331SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);3332}3333if (Type->isArrayType()) {3334// Initialize firstprivate array.3335if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {3336// Perform simple memcpy.3337CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);3338} else {3339// Initialize firstprivate array using element-by-element3340// initialization.3341CGF.EmitOMPAggregateAssign(3342PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,3343[&CGF, Elem, Init, &CapturesInfo](Address DestElement,3344Address SrcElement) {3345// Clean up any temporaries needed by the initialization.3346CodeGenFunction::OMPPrivateScope InitScope(CGF);3347InitScope.addPrivate(Elem, SrcElement);3348(void)InitScope.Privatize();3349// Emit initialization for single element.3350CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(3351CGF, &CapturesInfo);3352CGF.EmitAnyExprToMem(Init, DestElement,3353Init->getType().getQualifiers(),3354/*IsInitializer=*/false);3355});3356}3357} else {3358CodeGenFunction::OMPPrivateScope InitScope(CGF);3359InitScope.addPrivate(Elem, SharedRefLValue.getAddress());3360(void)InitScope.Privatize();3361CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);3362CGF.EmitExprAsInit(Init, VD, PrivateLValue,3363/*capturedByInit=*/false);3364}3365} else {3366CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);3367}3368}3369++FI;3370}3371}33723373/// Check if duplication function is required for taskloops.3374static bool checkInitIsRequired(CodeGenFunction &CGF,3375ArrayRef<PrivateDataTy> Privates) {3376bool InitRequired = false;3377for (const PrivateDataTy &Pair : Privates) {3378if (Pair.second.isLocalPrivate())3379continue;3380const VarDecl *VD = Pair.second.PrivateCopy;3381const Expr *Init = VD->getAnyInitializer();3382InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&3383!CGF.isTrivialInitializer(Init));3384if (InitRequired)3385break;3386}3387return InitRequired;3388}338933903391/// Emit task_dup function (for initialization of3392/// private/firstprivate/lastprivate vars and last_iter flag)3393/// \code3394/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int3395/// lastpriv) {3396/// // setup lastprivate flag3397/// task_dst->last = lastpriv;3398/// // could be constructor calls here...3399/// }3400/// \endcode3401static llvm::Value *3402emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,3403const OMPExecutableDirective &D,3404QualType KmpTaskTWithPrivatesPtrQTy,3405const RecordDecl *KmpTaskTWithPrivatesQTyRD,3406const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,3407QualType SharedsPtrTy, const OMPTaskDataTy &Data,3408ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {3409ASTContext &C = CGM.getContext();3410FunctionArgList Args;3411ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,3412KmpTaskTWithPrivatesPtrQTy,3413ImplicitParamKind::Other);3414ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,3415KmpTaskTWithPrivatesPtrQTy,3416ImplicitParamKind::Other);3417ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,3418ImplicitParamKind::Other);3419Args.push_back(&DstArg);3420Args.push_back(&SrcArg);3421Args.push_back(&LastprivArg);3422const auto &TaskDupFnInfo =3423CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);3424llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);3425std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});3426auto *TaskDup = llvm::Function::Create(3427TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());3428CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);3429TaskDup->setDoesNotRecurse();3430CodeGenFunction CGF(CGM);3431CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,3432Loc);34333434LValue TDBase = CGF.EmitLoadOfPointerLValue(3435CGF.GetAddrOfLocalVar(&DstArg),3436KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());3437// task_dst->liter = lastpriv;3438if (WithLastIter) {3439auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);3440LValue Base = CGF.EmitLValueForField(3441TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());3442LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);3443llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(3444CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);3445CGF.EmitStoreOfScalar(Lastpriv, LILVal);3446}34473448// Emit initial values for private copies (if any).3449assert(!Privates.empty());3450Address KmpTaskSharedsPtr = Address::invalid();3451if (!Data.FirstprivateVars.empty()) {3452LValue TDBase = CGF.EmitLoadOfPointerLValue(3453CGF.GetAddrOfLocalVar(&SrcArg),3454KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());3455LValue Base = CGF.EmitLValueForField(3456TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());3457KmpTaskSharedsPtr = Address(3458CGF.EmitLoadOfScalar(CGF.EmitLValueForField(3459Base, *std::next(KmpTaskTQTyRD->field_begin(),3460KmpTaskTShareds)),3461Loc),3462CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));3463}3464emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,3465SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);3466CGF.FinishFunction();3467return TaskDup;3468}34693470/// Checks if destructor function is required to be generated.3471/// \return true if cleanups are required, false otherwise.3472static bool3473checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,3474ArrayRef<PrivateDataTy> Privates) {3475for (const PrivateDataTy &P : Privates) {3476if (P.second.isLocalPrivate())3477continue;3478QualType Ty = P.second.Original->getType().getNonReferenceType();3479if (Ty.isDestructedType())3480return true;3481}3482return false;3483}34843485namespace {3486/// Loop generator for OpenMP iterator expression.3487class OMPIteratorGeneratorScope final3488: public CodeGenFunction::OMPPrivateScope {3489CodeGenFunction &CGF;3490const OMPIteratorExpr *E = nullptr;3491SmallVector<CodeGenFunction::JumpDest, 4> ContDests;3492SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;3493OMPIteratorGeneratorScope() = delete;3494OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;34953496public:3497OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)3498: CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {3499if (!E)3500return;3501SmallVector<llvm::Value *, 4> Uppers;3502for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {3503Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));3504const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));3505addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));3506const OMPIteratorHelperData &HelperData = E->getHelper(I);3507addPrivate(3508HelperData.CounterVD,3509CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));3510}3511Privatize();35123513for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {3514const OMPIteratorHelperData &HelperData = E->getHelper(I);3515LValue CLVal =3516CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),3517HelperData.CounterVD->getType());3518// Counter = 0;3519CGF.EmitStoreOfScalar(3520llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),3521CLVal);3522CodeGenFunction::JumpDest &ContDest =3523ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));3524CodeGenFunction::JumpDest &ExitDest =3525ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));3526// N = <number-of_iterations>;3527llvm::Value *N = Uppers[I];3528// cont:3529// if (Counter < N) goto body; else goto exit;3530CGF.EmitBlock(ContDest.getBlock());3531auto *CVal =3532CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());3533llvm::Value *Cmp =3534HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()3535? CGF.Builder.CreateICmpSLT(CVal, N)3536: CGF.Builder.CreateICmpULT(CVal, N);3537llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");3538CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());3539// body:3540CGF.EmitBlock(BodyBB);3541// Iteri = Begini + Counter * Stepi;3542CGF.EmitIgnoredExpr(HelperData.Update);3543}3544}3545~OMPIteratorGeneratorScope() {3546if (!E)3547return;3548for (unsigned I = E->numOfIterators(); I > 0; --I) {3549// Counter = Counter + 1;3550const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);3551CGF.EmitIgnoredExpr(HelperData.CounterUpdate);3552// goto cont;3553CGF.EmitBranchThroughCleanup(ContDests[I - 1]);3554// exit:3555CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);3556}3557}3558};3559} // namespace35603561static std::pair<llvm::Value *, llvm::Value *>3562getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {3563const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);3564llvm::Value *Addr;3565if (OASE) {3566const Expr *Base = OASE->getBase();3567Addr = CGF.EmitScalarExpr(Base);3568} else {3569Addr = CGF.EmitLValue(E).getPointer(CGF);3570}3571llvm::Value *SizeVal;3572QualType Ty = E->getType();3573if (OASE) {3574SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());3575for (const Expr *SE : OASE->getDimensions()) {3576llvm::Value *Sz = CGF.EmitScalarExpr(SE);3577Sz = CGF.EmitScalarConversion(3578Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());3579SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);3580}3581} else if (const auto *ASE =3582dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {3583LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);3584Address UpAddrAddress = UpAddrLVal.getAddress();3585llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(3586UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),3587/*Idx0=*/1);3588llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);3589llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);3590SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);3591} else {3592SizeVal = CGF.getTypeSize(Ty);3593}3594return std::make_pair(Addr, SizeVal);3595}35963597/// Builds kmp_depend_info, if it is not built yet, and builds flags type.3598static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {3599QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);3600if (KmpTaskAffinityInfoTy.isNull()) {3601RecordDecl *KmpAffinityInfoRD =3602C.buildImplicitRecord("kmp_task_affinity_info_t");3603KmpAffinityInfoRD->startDefinition();3604addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());3605addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());3606addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);3607KmpAffinityInfoRD->completeDefinition();3608KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);3609}3610}36113612CGOpenMPRuntime::TaskResultTy3613CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,3614const OMPExecutableDirective &D,3615llvm::Function *TaskFunction, QualType SharedsTy,3616Address Shareds, const OMPTaskDataTy &Data) {3617ASTContext &C = CGM.getContext();3618llvm::SmallVector<PrivateDataTy, 4> Privates;3619// Aggregate privates and sort them by the alignment.3620const auto *I = Data.PrivateCopies.begin();3621for (const Expr *E : Data.PrivateVars) {3622const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());3623Privates.emplace_back(3624C.getDeclAlign(VD),3625PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),3626/*PrivateElemInit=*/nullptr));3627++I;3628}3629I = Data.FirstprivateCopies.begin();3630const auto *IElemInitRef = Data.FirstprivateInits.begin();3631for (const Expr *E : Data.FirstprivateVars) {3632const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());3633Privates.emplace_back(3634C.getDeclAlign(VD),3635PrivateHelpersTy(3636E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),3637cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));3638++I;3639++IElemInitRef;3640}3641I = Data.LastprivateCopies.begin();3642for (const Expr *E : Data.LastprivateVars) {3643const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());3644Privates.emplace_back(3645C.getDeclAlign(VD),3646PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),3647/*PrivateElemInit=*/nullptr));3648++I;3649}3650for (const VarDecl *VD : Data.PrivateLocals) {3651if (isAllocatableDecl(VD))3652Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));3653else3654Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));3655}3656llvm::stable_sort(Privates,3657[](const PrivateDataTy &L, const PrivateDataTy &R) {3658return L.first > R.first;3659});3660QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);3661// Build type kmp_routine_entry_t (if not built yet).3662emitKmpRoutineEntryT(KmpInt32Ty);3663// Build type kmp_task_t (if not built yet).3664if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {3665if (SavedKmpTaskloopTQTy.isNull()) {3666SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(3667CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));3668}3669KmpTaskTQTy = SavedKmpTaskloopTQTy;3670} else {3671assert((D.getDirectiveKind() == OMPD_task ||3672isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||3673isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&3674"Expected taskloop, task or target directive");3675if (SavedKmpTaskTQTy.isNull()) {3676SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(3677CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));3678}3679KmpTaskTQTy = SavedKmpTaskTQTy;3680}3681const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());3682// Build particular struct kmp_task_t for the given task.3683const RecordDecl *KmpTaskTWithPrivatesQTyRD =3684createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);3685QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);3686QualType KmpTaskTWithPrivatesPtrQTy =3687C.getPointerType(KmpTaskTWithPrivatesQTy);3688llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);3689llvm::Type *KmpTaskTWithPrivatesPtrTy =3690KmpTaskTWithPrivatesTy->getPointerTo();3691llvm::Value *KmpTaskTWithPrivatesTySize =3692CGF.getTypeSize(KmpTaskTWithPrivatesQTy);3693QualType SharedsPtrTy = C.getPointerType(SharedsTy);36943695// Emit initial values for private copies (if any).3696llvm::Value *TaskPrivatesMap = nullptr;3697llvm::Type *TaskPrivatesMapTy =3698std::next(TaskFunction->arg_begin(), 3)->getType();3699if (!Privates.empty()) {3700auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());3701TaskPrivatesMap =3702emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);3703TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(3704TaskPrivatesMap, TaskPrivatesMapTy);3705} else {3706TaskPrivatesMap = llvm::ConstantPointerNull::get(3707cast<llvm::PointerType>(TaskPrivatesMapTy));3708}3709// Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,3710// kmp_task_t *tt);3711llvm::Function *TaskEntry = emitProxyTaskFunction(3712CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,3713KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,3714TaskPrivatesMap);37153716// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,3717// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,3718// kmp_routine_entry_t *task_entry);3719// Task flags. Format is taken from3720// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,3721// description of kmp_tasking_flags struct.3722enum {3723TiedFlag = 0x1,3724FinalFlag = 0x2,3725DestructorsFlag = 0x8,3726PriorityFlag = 0x20,3727DetachableFlag = 0x40,3728};3729unsigned Flags = Data.Tied ? TiedFlag : 0;3730bool NeedsCleanup = false;3731if (!Privates.empty()) {3732NeedsCleanup =3733checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);3734if (NeedsCleanup)3735Flags = Flags | DestructorsFlag;3736}3737if (Data.Priority.getInt())3738Flags = Flags | PriorityFlag;3739if (D.hasClausesOfKind<OMPDetachClause>())3740Flags = Flags | DetachableFlag;3741llvm::Value *TaskFlags =3742Data.Final.getPointer()3743? CGF.Builder.CreateSelect(Data.Final.getPointer(),3744CGF.Builder.getInt32(FinalFlag),3745CGF.Builder.getInt32(/*C=*/0))3746: CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);3747TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));3748llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));3749SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),3750getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,3751SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(3752TaskEntry, KmpRoutineEntryPtrTy)};3753llvm::Value *NewTask;3754if (D.hasClausesOfKind<OMPNowaitClause>()) {3755// Check if we have any device clause associated with the directive.3756const Expr *Device = nullptr;3757if (auto *C = D.getSingleClause<OMPDeviceClause>())3758Device = C->getDevice();3759// Emit device ID if any otherwise use default value.3760llvm::Value *DeviceID;3761if (Device)3762DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),3763CGF.Int64Ty, /*isSigned=*/true);3764else3765DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);3766AllocArgs.push_back(DeviceID);3767NewTask = CGF.EmitRuntimeCall(3768OMPBuilder.getOrCreateRuntimeFunction(3769CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),3770AllocArgs);3771} else {3772NewTask =3773CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(3774CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),3775AllocArgs);3776}3777// Emit detach clause initialization.3778// evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,3779// task_descriptor);3780if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {3781const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();3782LValue EvtLVal = CGF.EmitLValue(Evt);37833784// Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,3785// int gtid, kmp_task_t *task);3786llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());3787llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());3788Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);3789llvm::Value *EvtVal = CGF.EmitRuntimeCall(3790OMPBuilder.getOrCreateRuntimeFunction(3791CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),3792{Loc, Tid, NewTask});3793EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),3794Evt->getExprLoc());3795CGF.EmitStoreOfScalar(EvtVal, EvtLVal);3796}3797// Process affinity clauses.3798if (D.hasClausesOfKind<OMPAffinityClause>()) {3799// Process list of affinity data.3800ASTContext &C = CGM.getContext();3801Address AffinitiesArray = Address::invalid();3802// Calculate number of elements to form the array of affinity data.3803llvm::Value *NumOfElements = nullptr;3804unsigned NumAffinities = 0;3805for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {3806if (const Expr *Modifier = C->getModifier()) {3807const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());3808for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {3809llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);3810Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);3811NumOfElements =3812NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;3813}3814} else {3815NumAffinities += C->varlist_size();3816}3817}3818getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);3819// Fields ids in kmp_task_affinity_info record.3820enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };38213822QualType KmpTaskAffinityInfoArrayTy;3823if (NumOfElements) {3824NumOfElements = CGF.Builder.CreateNUWAdd(3825llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);3826auto *OVE = new (C) OpaqueValueExpr(3827Loc,3828C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),3829VK_PRValue);3830CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,3831RValue::get(NumOfElements));3832KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(3833KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,3834/*IndexTypeQuals=*/0, SourceRange(Loc, Loc));3835// Properly emit variable-sized array.3836auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,3837ImplicitParamKind::Other);3838CGF.EmitVarDecl(*PD);3839AffinitiesArray = CGF.GetAddrOfLocalVar(PD);3840NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,3841/*isSigned=*/false);3842} else {3843KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(3844KmpTaskAffinityInfoTy,3845llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,3846ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);3847AffinitiesArray =3848CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");3849AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);3850NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,3851/*isSigned=*/false);3852}38533854const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();3855// Fill array by elements without iterators.3856unsigned Pos = 0;3857bool HasIterator = false;3858for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {3859if (C->getModifier()) {3860HasIterator = true;3861continue;3862}3863for (const Expr *E : C->varlists()) {3864llvm::Value *Addr;3865llvm::Value *Size;3866std::tie(Addr, Size) = getPointerAndSize(CGF, E);3867LValue Base =3868CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),3869KmpTaskAffinityInfoTy);3870// affs[i].base_addr = &<Affinities[i].second>;3871LValue BaseAddrLVal = CGF.EmitLValueForField(3872Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));3873CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),3874BaseAddrLVal);3875// affs[i].len = sizeof(<Affinities[i].second>);3876LValue LenLVal = CGF.EmitLValueForField(3877Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));3878CGF.EmitStoreOfScalar(Size, LenLVal);3879++Pos;3880}3881}3882LValue PosLVal;3883if (HasIterator) {3884PosLVal = CGF.MakeAddrLValue(3885CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),3886C.getSizeType());3887CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);3888}3889// Process elements with iterators.3890for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {3891const Expr *Modifier = C->getModifier();3892if (!Modifier)3893continue;3894OMPIteratorGeneratorScope IteratorScope(3895CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));3896for (const Expr *E : C->varlists()) {3897llvm::Value *Addr;3898llvm::Value *Size;3899std::tie(Addr, Size) = getPointerAndSize(CGF, E);3900llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());3901LValue Base =3902CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),3903KmpTaskAffinityInfoTy);3904// affs[i].base_addr = &<Affinities[i].second>;3905LValue BaseAddrLVal = CGF.EmitLValueForField(3906Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));3907CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),3908BaseAddrLVal);3909// affs[i].len = sizeof(<Affinities[i].second>);3910LValue LenLVal = CGF.EmitLValueForField(3911Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));3912CGF.EmitStoreOfScalar(Size, LenLVal);3913Idx = CGF.Builder.CreateNUWAdd(3914Idx, llvm::ConstantInt::get(Idx->getType(), 1));3915CGF.EmitStoreOfScalar(Idx, PosLVal);3916}3917}3918// Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,3919// kmp_int32 gtid, kmp_task_t *new_task, kmp_int323920// naffins, kmp_task_affinity_info_t *affin_list);3921llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);3922llvm::Value *GTid = getThreadID(CGF, Loc);3923llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(3924AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);3925// FIXME: Emit the function and ignore its result for now unless the3926// runtime function is properly implemented.3927(void)CGF.EmitRuntimeCall(3928OMPBuilder.getOrCreateRuntimeFunction(3929CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),3930{LocRef, GTid, NewTask, NumOfElements, AffinListPtr});3931}3932llvm::Value *NewTaskNewTaskTTy =3933CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(3934NewTask, KmpTaskTWithPrivatesPtrTy);3935LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,3936KmpTaskTWithPrivatesQTy);3937LValue TDBase =3938CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());3939// Fill the data in the resulting kmp_task_t record.3940// Copy shareds if there are any.3941Address KmpTaskSharedsPtr = Address::invalid();3942if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {3943KmpTaskSharedsPtr = Address(3944CGF.EmitLoadOfScalar(3945CGF.EmitLValueForField(3946TDBase,3947*std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),3948Loc),3949CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));3950LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);3951LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);3952CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);3953}3954// Emit initial values for private copies (if any).3955TaskResultTy Result;3956if (!Privates.empty()) {3957emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,3958SharedsTy, SharedsPtrTy, Data, Privates,3959/*ForDup=*/false);3960if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&3961(!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {3962Result.TaskDupFn = emitTaskDupFunction(3963CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,3964KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,3965/*WithLastIter=*/!Data.LastprivateVars.empty());3966}3967}3968// Fields of union "kmp_cmplrdata_t" for destructors and priority.3969enum { Priority = 0, Destructors = 1 };3970// Provide pointer to function with destructors for privates.3971auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);3972const RecordDecl *KmpCmplrdataUD =3973(*FI)->getType()->getAsUnionType()->getDecl();3974if (NeedsCleanup) {3975llvm::Value *DestructorFn = emitDestructorsFunction(3976CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,3977KmpTaskTWithPrivatesQTy);3978LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);3979LValue DestructorsLV = CGF.EmitLValueForField(3980Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));3981CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(3982DestructorFn, KmpRoutineEntryPtrTy),3983DestructorsLV);3984}3985// Set priority.3986if (Data.Priority.getInt()) {3987LValue Data2LV = CGF.EmitLValueForField(3988TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));3989LValue PriorityLV = CGF.EmitLValueForField(3990Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));3991CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);3992}3993Result.NewTask = NewTask;3994Result.TaskEntry = TaskEntry;3995Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;3996Result.TDBase = TDBase;3997Result.KmpTaskTQTyRD = KmpTaskTQTyRD;3998return Result;3999}40004001/// Translates internal dependency kind into the runtime kind.4002static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {4003RTLDependenceKindTy DepKind;4004switch (K) {4005case OMPC_DEPEND_in:4006DepKind = RTLDependenceKindTy::DepIn;4007break;4008// Out and InOut dependencies must use the same code.4009case OMPC_DEPEND_out:4010case OMPC_DEPEND_inout:4011DepKind = RTLDependenceKindTy::DepInOut;4012break;4013case OMPC_DEPEND_mutexinoutset:4014DepKind = RTLDependenceKindTy::DepMutexInOutSet;4015break;4016case OMPC_DEPEND_inoutset:4017DepKind = RTLDependenceKindTy::DepInOutSet;4018break;4019case OMPC_DEPEND_outallmemory:4020DepKind = RTLDependenceKindTy::DepOmpAllMem;4021break;4022case OMPC_DEPEND_source:4023case OMPC_DEPEND_sink:4024case OMPC_DEPEND_depobj:4025case OMPC_DEPEND_inoutallmemory:4026case OMPC_DEPEND_unknown:4027llvm_unreachable("Unknown task dependence type");4028}4029return DepKind;4030}40314032/// Builds kmp_depend_info, if it is not built yet, and builds flags type.4033static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,4034QualType &FlagsTy) {4035FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);4036if (KmpDependInfoTy.isNull()) {4037RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");4038KmpDependInfoRD->startDefinition();4039addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());4040addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());4041addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);4042KmpDependInfoRD->completeDefinition();4043KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);4044}4045}40464047std::pair<llvm::Value *, LValue>4048CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,4049SourceLocation Loc) {4050ASTContext &C = CGM.getContext();4051QualType FlagsTy;4052getDependTypes(C, KmpDependInfoTy, FlagsTy);4053RecordDecl *KmpDependInfoRD =4054cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());4055QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);4056LValue Base = CGF.EmitLoadOfPointerLValue(4057DepobjLVal.getAddress().withElementType(4058CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),4059KmpDependInfoPtrTy->castAs<PointerType>());4060Address DepObjAddr = CGF.Builder.CreateGEP(4061CGF, Base.getAddress(),4062llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));4063LValue NumDepsBase = CGF.MakeAddrLValue(4064DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());4065// NumDeps = deps[i].base_addr;4066LValue BaseAddrLVal = CGF.EmitLValueForField(4067NumDepsBase,4068*std::next(KmpDependInfoRD->field_begin(),4069static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));4070llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);4071return std::make_pair(NumDeps, Base);4072}40734074static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,4075llvm::PointerUnion<unsigned *, LValue *> Pos,4076const OMPTaskDataTy::DependData &Data,4077Address DependenciesArray) {4078CodeGenModule &CGM = CGF.CGM;4079ASTContext &C = CGM.getContext();4080QualType FlagsTy;4081getDependTypes(C, KmpDependInfoTy, FlagsTy);4082RecordDecl *KmpDependInfoRD =4083cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());4084llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);40854086OMPIteratorGeneratorScope IteratorScope(4087CGF, cast_or_null<OMPIteratorExpr>(4088Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()4089: nullptr));4090for (const Expr *E : Data.DepExprs) {4091llvm::Value *Addr;4092llvm::Value *Size;40934094// The expression will be a nullptr in the 'omp_all_memory' case.4095if (E) {4096std::tie(Addr, Size) = getPointerAndSize(CGF, E);4097Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);4098} else {4099Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);4100Size = llvm::ConstantInt::get(CGF.SizeTy, 0);4101}4102LValue Base;4103if (unsigned *P = Pos.dyn_cast<unsigned *>()) {4104Base = CGF.MakeAddrLValue(4105CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);4106} else {4107assert(E && "Expected a non-null expression");4108LValue &PosLVal = *Pos.get<LValue *>();4109llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());4110Base = CGF.MakeAddrLValue(4111CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);4112}4113// deps[i].base_addr = &<Dependencies[i].second>;4114LValue BaseAddrLVal = CGF.EmitLValueForField(4115Base,4116*std::next(KmpDependInfoRD->field_begin(),4117static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));4118CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);4119// deps[i].len = sizeof(<Dependencies[i].second>);4120LValue LenLVal = CGF.EmitLValueForField(4121Base, *std::next(KmpDependInfoRD->field_begin(),4122static_cast<unsigned int>(RTLDependInfoFields::Len)));4123CGF.EmitStoreOfScalar(Size, LenLVal);4124// deps[i].flags = <Dependencies[i].first>;4125RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);4126LValue FlagsLVal = CGF.EmitLValueForField(4127Base,4128*std::next(KmpDependInfoRD->field_begin(),4129static_cast<unsigned int>(RTLDependInfoFields::Flags)));4130CGF.EmitStoreOfScalar(4131llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),4132FlagsLVal);4133if (unsigned *P = Pos.dyn_cast<unsigned *>()) {4134++(*P);4135} else {4136LValue &PosLVal = *Pos.get<LValue *>();4137llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());4138Idx = CGF.Builder.CreateNUWAdd(Idx,4139llvm::ConstantInt::get(Idx->getType(), 1));4140CGF.EmitStoreOfScalar(Idx, PosLVal);4141}4142}4143}41444145SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(4146CodeGenFunction &CGF, QualType &KmpDependInfoTy,4147const OMPTaskDataTy::DependData &Data) {4148assert(Data.DepKind == OMPC_DEPEND_depobj &&4149"Expected depobj dependency kind.");4150SmallVector<llvm::Value *, 4> Sizes;4151SmallVector<LValue, 4> SizeLVals;4152ASTContext &C = CGF.getContext();4153{4154OMPIteratorGeneratorScope IteratorScope(4155CGF, cast_or_null<OMPIteratorExpr>(4156Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()4157: nullptr));4158for (const Expr *E : Data.DepExprs) {4159llvm::Value *NumDeps;4160LValue Base;4161LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());4162std::tie(NumDeps, Base) =4163getDepobjElements(CGF, DepobjLVal, E->getExprLoc());4164LValue NumLVal = CGF.MakeAddrLValue(4165CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),4166C.getUIntPtrType());4167CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),4168NumLVal.getAddress());4169llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());4170llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);4171CGF.EmitStoreOfScalar(Add, NumLVal);4172SizeLVals.push_back(NumLVal);4173}4174}4175for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {4176llvm::Value *Size =4177CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());4178Sizes.push_back(Size);4179}4180return Sizes;4181}41824183void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,4184QualType &KmpDependInfoTy,4185LValue PosLVal,4186const OMPTaskDataTy::DependData &Data,4187Address DependenciesArray) {4188assert(Data.DepKind == OMPC_DEPEND_depobj &&4189"Expected depobj dependency kind.");4190llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);4191{4192OMPIteratorGeneratorScope IteratorScope(4193CGF, cast_or_null<OMPIteratorExpr>(4194Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()4195: nullptr));4196for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {4197const Expr *E = Data.DepExprs[I];4198llvm::Value *NumDeps;4199LValue Base;4200LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());4201std::tie(NumDeps, Base) =4202getDepobjElements(CGF, DepobjLVal, E->getExprLoc());42034204// memcopy dependency data.4205llvm::Value *Size = CGF.Builder.CreateNUWMul(4206ElSize,4207CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));4208llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());4209Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);4210CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);42114212// Increase pos.4213// pos += size;4214llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);4215CGF.EmitStoreOfScalar(Add, PosLVal);4216}4217}4218}42194220std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(4221CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,4222SourceLocation Loc) {4223if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {4224return D.DepExprs.empty();4225}))4226return std::make_pair(nullptr, Address::invalid());4227// Process list of dependencies.4228ASTContext &C = CGM.getContext();4229Address DependenciesArray = Address::invalid();4230llvm::Value *NumOfElements = nullptr;4231unsigned NumDependencies = std::accumulate(4232Dependencies.begin(), Dependencies.end(), 0,4233[](unsigned V, const OMPTaskDataTy::DependData &D) {4234return D.DepKind == OMPC_DEPEND_depobj4235? V4236: (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));4237});4238QualType FlagsTy;4239getDependTypes(C, KmpDependInfoTy, FlagsTy);4240bool HasDepobjDeps = false;4241bool HasRegularWithIterators = false;4242llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);4243llvm::Value *NumOfRegularWithIterators =4244llvm::ConstantInt::get(CGF.IntPtrTy, 0);4245// Calculate number of depobj dependencies and regular deps with the4246// iterators.4247for (const OMPTaskDataTy::DependData &D : Dependencies) {4248if (D.DepKind == OMPC_DEPEND_depobj) {4249SmallVector<llvm::Value *, 4> Sizes =4250emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);4251for (llvm::Value *Size : Sizes) {4252NumOfDepobjElements =4253CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);4254}4255HasDepobjDeps = true;4256continue;4257}4258// Include number of iterations, if any.42594260if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {4261llvm::Value *ClauseIteratorSpace =4262llvm::ConstantInt::get(CGF.IntPtrTy, 1);4263for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {4264llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);4265Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);4266ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);4267}4268llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(4269ClauseIteratorSpace,4270llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));4271NumOfRegularWithIterators =4272CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);4273HasRegularWithIterators = true;4274continue;4275}4276}42774278QualType KmpDependInfoArrayTy;4279if (HasDepobjDeps || HasRegularWithIterators) {4280NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,4281/*isSigned=*/false);4282if (HasDepobjDeps) {4283NumOfElements =4284CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);4285}4286if (HasRegularWithIterators) {4287NumOfElements =4288CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);4289}4290auto *OVE = new (C) OpaqueValueExpr(4291Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),4292VK_PRValue);4293CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,4294RValue::get(NumOfElements));4295KmpDependInfoArrayTy =4296C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,4297/*IndexTypeQuals=*/0, SourceRange(Loc, Loc));4298// CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);4299// Properly emit variable-sized array.4300auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,4301ImplicitParamKind::Other);4302CGF.EmitVarDecl(*PD);4303DependenciesArray = CGF.GetAddrOfLocalVar(PD);4304NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,4305/*isSigned=*/false);4306} else {4307KmpDependInfoArrayTy = C.getConstantArrayType(4308KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,4309ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);4310DependenciesArray =4311CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");4312DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);4313NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,4314/*isSigned=*/false);4315}4316unsigned Pos = 0;4317for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {4318if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||4319Dependencies[I].IteratorExpr)4320continue;4321emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],4322DependenciesArray);4323}4324// Copy regular dependencies with iterators.4325LValue PosLVal = CGF.MakeAddrLValue(4326CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());4327CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);4328for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {4329if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||4330!Dependencies[I].IteratorExpr)4331continue;4332emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],4333DependenciesArray);4334}4335// Copy final depobj arrays without iterators.4336if (HasDepobjDeps) {4337for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {4338if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)4339continue;4340emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],4341DependenciesArray);4342}4343}4344DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(4345DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);4346return std::make_pair(NumOfElements, DependenciesArray);4347}43484349Address CGOpenMPRuntime::emitDepobjDependClause(4350CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,4351SourceLocation Loc) {4352if (Dependencies.DepExprs.empty())4353return Address::invalid();4354// Process list of dependencies.4355ASTContext &C = CGM.getContext();4356Address DependenciesArray = Address::invalid();4357unsigned NumDependencies = Dependencies.DepExprs.size();4358QualType FlagsTy;4359getDependTypes(C, KmpDependInfoTy, FlagsTy);4360RecordDecl *KmpDependInfoRD =4361cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());43624363llvm::Value *Size;4364// Define type kmp_depend_info[<Dependencies.size()>];4365// For depobj reserve one extra element to store the number of elements.4366// It is required to handle depobj(x) update(in) construct.4367// kmp_depend_info[<Dependencies.size()>] deps;4368llvm::Value *NumDepsVal;4369CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);4370if (const auto *IE =4371cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {4372NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);4373for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {4374llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);4375Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);4376NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);4377}4378Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),4379NumDepsVal);4380CharUnits SizeInBytes =4381C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);4382llvm::Value *RecSize = CGM.getSize(SizeInBytes);4383Size = CGF.Builder.CreateNUWMul(Size, RecSize);4384NumDepsVal =4385CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);4386} else {4387QualType KmpDependInfoArrayTy = C.getConstantArrayType(4388KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),4389nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);4390CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);4391Size = CGM.getSize(Sz.alignTo(Align));4392NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);4393}4394// Need to allocate on the dynamic memory.4395llvm::Value *ThreadID = getThreadID(CGF, Loc);4396// Use default allocator.4397llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);4398llvm::Value *Args[] = {ThreadID, Size, Allocator};43994400llvm::Value *Addr =4401CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(4402CGM.getModule(), OMPRTL___kmpc_alloc),4403Args, ".dep.arr.addr");4404llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);4405Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(4406Addr, KmpDependInfoLlvmTy->getPointerTo());4407DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);4408// Write number of elements in the first element of array for depobj.4409LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);4410// deps[i].base_addr = NumDependencies;4411LValue BaseAddrLVal = CGF.EmitLValueForField(4412Base,4413*std::next(KmpDependInfoRD->field_begin(),4414static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));4415CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);4416llvm::PointerUnion<unsigned *, LValue *> Pos;4417unsigned Idx = 1;4418LValue PosLVal;4419if (Dependencies.IteratorExpr) {4420PosLVal = CGF.MakeAddrLValue(4421CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),4422C.getSizeType());4423CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,4424/*IsInit=*/true);4425Pos = &PosLVal;4426} else {4427Pos = &Idx;4428}4429emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);4430DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(4431CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,4432CGF.Int8Ty);4433return DependenciesArray;4434}44354436void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,4437SourceLocation Loc) {4438ASTContext &C = CGM.getContext();4439QualType FlagsTy;4440getDependTypes(C, KmpDependInfoTy, FlagsTy);4441LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),4442C.VoidPtrTy.castAs<PointerType>());4443QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);4444Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(4445Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),4446CGF.ConvertTypeForMem(KmpDependInfoTy));4447llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(4448Addr.getElementType(), Addr.emitRawPointer(CGF),4449llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));4450DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,4451CGF.VoidPtrTy);4452llvm::Value *ThreadID = getThreadID(CGF, Loc);4453// Use default allocator.4454llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);4455llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};44564457// _kmpc_free(gtid, addr, nullptr);4458(void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(4459CGM.getModule(), OMPRTL___kmpc_free),4460Args);4461}44624463void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,4464OpenMPDependClauseKind NewDepKind,4465SourceLocation Loc) {4466ASTContext &C = CGM.getContext();4467QualType FlagsTy;4468getDependTypes(C, KmpDependInfoTy, FlagsTy);4469RecordDecl *KmpDependInfoRD =4470cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());4471llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);4472llvm::Value *NumDeps;4473LValue Base;4474std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);44754476Address Begin = Base.getAddress();4477// Cast from pointer to array type to pointer to single element.4478llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),4479Begin.emitRawPointer(CGF), NumDeps);4480// The basic structure here is a while-do loop.4481llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");4482llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");4483llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();4484CGF.EmitBlock(BodyBB);4485llvm::PHINode *ElementPHI =4486CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");4487ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);4488Begin = Begin.withPointer(ElementPHI, KnownNonNull);4489Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),4490Base.getTBAAInfo());4491// deps[i].flags = NewDepKind;4492RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);4493LValue FlagsLVal = CGF.EmitLValueForField(4494Base, *std::next(KmpDependInfoRD->field_begin(),4495static_cast<unsigned int>(RTLDependInfoFields::Flags)));4496CGF.EmitStoreOfScalar(4497llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),4498FlagsLVal);44994500// Shift the address forward by one element.4501llvm::Value *ElementNext =4502CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")4503.emitRawPointer(CGF);4504ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());4505llvm::Value *IsEmpty =4506CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");4507CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);4508// Done.4509CGF.EmitBlock(DoneBB, /*IsFinished=*/true);4510}45114512void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,4513const OMPExecutableDirective &D,4514llvm::Function *TaskFunction,4515QualType SharedsTy, Address Shareds,4516const Expr *IfCond,4517const OMPTaskDataTy &Data) {4518if (!CGF.HaveInsertPoint())4519return;45204521TaskResultTy Result =4522emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);4523llvm::Value *NewTask = Result.NewTask;4524llvm::Function *TaskEntry = Result.TaskEntry;4525llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;4526LValue TDBase = Result.TDBase;4527const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;4528// Process list of dependences.4529Address DependenciesArray = Address::invalid();4530llvm::Value *NumOfElements;4531std::tie(NumOfElements, DependenciesArray) =4532emitDependClause(CGF, Data.Dependences, Loc);45334534// NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()4535// libcall.4536// Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,4537// kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,4538// kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence4539// list is not empty4540llvm::Value *ThreadID = getThreadID(CGF, Loc);4541llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);4542llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };4543llvm::Value *DepTaskArgs[7];4544if (!Data.Dependences.empty()) {4545DepTaskArgs[0] = UpLoc;4546DepTaskArgs[1] = ThreadID;4547DepTaskArgs[2] = NewTask;4548DepTaskArgs[3] = NumOfElements;4549DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);4550DepTaskArgs[5] = CGF.Builder.getInt32(0);4551DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);4552}4553auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,4554&DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {4555if (!Data.Tied) {4556auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);4557LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);4558CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);4559}4560if (!Data.Dependences.empty()) {4561CGF.EmitRuntimeCall(4562OMPBuilder.getOrCreateRuntimeFunction(4563CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),4564DepTaskArgs);4565} else {4566CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(4567CGM.getModule(), OMPRTL___kmpc_omp_task),4568TaskArgs);4569}4570// Check if parent region is untied and build return for untied task;4571if (auto *Region =4572dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))4573Region->emitUntiedSwitch(CGF);4574};45754576llvm::Value *DepWaitTaskArgs[7];4577if (!Data.Dependences.empty()) {4578DepWaitTaskArgs[0] = UpLoc;4579DepWaitTaskArgs[1] = ThreadID;4580DepWaitTaskArgs[2] = NumOfElements;4581DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);4582DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);4583DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);4584DepWaitTaskArgs[6] =4585llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);4586}4587auto &M = CGM.getModule();4588auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,4589TaskEntry, &Data, &DepWaitTaskArgs,4590Loc](CodeGenFunction &CGF, PrePostActionTy &) {4591CodeGenFunction::RunCleanupsScope LocalScope(CGF);4592// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,4593// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int324594// ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info4595// is specified.4596if (!Data.Dependences.empty())4597CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(4598M, OMPRTL___kmpc_omp_taskwait_deps_51),4599DepWaitTaskArgs);4600// Call proxy_task_entry(gtid, new_task);4601auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,4602Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {4603Action.Enter(CGF);4604llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};4605CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,4606OutlinedFnArgs);4607};46084609// Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,4610// kmp_task_t *new_task);4611// Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,4612// kmp_task_t *new_task);4613RegionCodeGenTy RCG(CodeGen);4614CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(4615M, OMPRTL___kmpc_omp_task_begin_if0),4616TaskArgs,4617OMPBuilder.getOrCreateRuntimeFunction(4618M, OMPRTL___kmpc_omp_task_complete_if0),4619TaskArgs);4620RCG.setAction(Action);4621RCG(CGF);4622};46234624if (IfCond) {4625emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);4626} else {4627RegionCodeGenTy ThenRCG(ThenCodeGen);4628ThenRCG(CGF);4629}4630}46314632void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,4633const OMPLoopDirective &D,4634llvm::Function *TaskFunction,4635QualType SharedsTy, Address Shareds,4636const Expr *IfCond,4637const OMPTaskDataTy &Data) {4638if (!CGF.HaveInsertPoint())4639return;4640TaskResultTy Result =4641emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);4642// NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()4643// libcall.4644// Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int4645// if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int4646// sched, kmp_uint64 grainsize, void *task_dup);4647llvm::Value *ThreadID = getThreadID(CGF, Loc);4648llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);4649llvm::Value *IfVal;4650if (IfCond) {4651IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,4652/*isSigned=*/true);4653} else {4654IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);4655}46564657LValue LBLVal = CGF.EmitLValueForField(4658Result.TDBase,4659*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));4660const auto *LBVar =4661cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());4662CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),4663/*IsInitializer=*/true);4664LValue UBLVal = CGF.EmitLValueForField(4665Result.TDBase,4666*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));4667const auto *UBVar =4668cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());4669CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),4670/*IsInitializer=*/true);4671LValue StLVal = CGF.EmitLValueForField(4672Result.TDBase,4673*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));4674const auto *StVar =4675cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());4676CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),4677/*IsInitializer=*/true);4678// Store reductions address.4679LValue RedLVal = CGF.EmitLValueForField(4680Result.TDBase,4681*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));4682if (Data.Reductions) {4683CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);4684} else {4685CGF.EmitNullInitialization(RedLVal.getAddress(),4686CGF.getContext().VoidPtrTy);4687}4688enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };4689llvm::Value *TaskArgs[] = {4690UpLoc,4691ThreadID,4692Result.NewTask,4693IfVal,4694LBLVal.getPointer(CGF),4695UBLVal.getPointer(CGF),4696CGF.EmitLoadOfScalar(StLVal, Loc),4697llvm::ConstantInt::getSigned(4698CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler4699llvm::ConstantInt::getSigned(4700CGF.IntTy, Data.Schedule.getPointer()4701? Data.Schedule.getInt() ? NumTasks : Grainsize4702: NoSchedule),4703Data.Schedule.getPointer()4704? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,4705/*isSigned=*/false)4706: llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),4707Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(4708Result.TaskDupFn, CGF.VoidPtrTy)4709: llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};4710CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(4711CGM.getModule(), OMPRTL___kmpc_taskloop),4712TaskArgs);4713}47144715/// Emit reduction operation for each element of array (required for4716/// array sections) LHS op = RHS.4717/// \param Type Type of array.4718/// \param LHSVar Variable on the left side of the reduction operation4719/// (references element of array in original variable).4720/// \param RHSVar Variable on the right side of the reduction operation4721/// (references element of array in original variable).4722/// \param RedOpGen Generator of reduction operation with use of LHSVar and4723/// RHSVar.4724static void EmitOMPAggregateReduction(4725CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,4726const VarDecl *RHSVar,4727const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,4728const Expr *, const Expr *)> &RedOpGen,4729const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,4730const Expr *UpExpr = nullptr) {4731// Perform element-by-element initialization.4732QualType ElementTy;4733Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);4734Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);47354736// Drill down to the base element type on both arrays.4737const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();4738llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);47394740llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);4741llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);4742// Cast from pointer to array type to pointer to single element.4743llvm::Value *LHSEnd =4744CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);4745// The basic structure here is a while-do loop.4746llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");4747llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");4748llvm::Value *IsEmpty =4749CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");4750CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);47514752// Enter the loop body, making that address the current address.4753llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();4754CGF.EmitBlock(BodyBB);47554756CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);47574758llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(4759RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");4760RHSElementPHI->addIncoming(RHSBegin, EntryBB);4761Address RHSElementCurrent(4762RHSElementPHI, RHSAddr.getElementType(),4763RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));47644765llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(4766LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");4767LHSElementPHI->addIncoming(LHSBegin, EntryBB);4768Address LHSElementCurrent(4769LHSElementPHI, LHSAddr.getElementType(),4770LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));47714772// Emit copy.4773CodeGenFunction::OMPPrivateScope Scope(CGF);4774Scope.addPrivate(LHSVar, LHSElementCurrent);4775Scope.addPrivate(RHSVar, RHSElementCurrent);4776Scope.Privatize();4777RedOpGen(CGF, XExpr, EExpr, UpExpr);4778Scope.ForceCleanup();47794780// Shift the address forward by one element.4781llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(4782LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,4783"omp.arraycpy.dest.element");4784llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(4785RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,4786"omp.arraycpy.src.element");4787// Check whether we've reached the end.4788llvm::Value *Done =4789CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");4790CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);4791LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());4792RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());47934794// Done.4795CGF.EmitBlock(DoneBB, /*IsFinished=*/true);4796}47974798/// Emit reduction combiner. If the combiner is a simple expression emit it as4799/// is, otherwise consider it as combiner of UDR decl and emit it as a call of4800/// UDR combiner function.4801static void emitReductionCombiner(CodeGenFunction &CGF,4802const Expr *ReductionOp) {4803if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))4804if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))4805if (const auto *DRE =4806dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))4807if (const auto *DRD =4808dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {4809std::pair<llvm::Function *, llvm::Function *> Reduction =4810CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);4811RValue Func = RValue::get(Reduction.first);4812CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);4813CGF.EmitIgnoredExpr(ReductionOp);4814return;4815}4816CGF.EmitIgnoredExpr(ReductionOp);4817}48184819llvm::Function *CGOpenMPRuntime::emitReductionFunction(4820StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,4821ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,4822ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {4823ASTContext &C = CGM.getContext();48244825// void reduction_func(void *LHSArg, void *RHSArg);4826FunctionArgList Args;4827ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,4828ImplicitParamKind::Other);4829ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,4830ImplicitParamKind::Other);4831Args.push_back(&LHSArg);4832Args.push_back(&RHSArg);4833const auto &CGFI =4834CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);4835std::string Name = getReductionFuncName(ReducerName);4836auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),4837llvm::GlobalValue::InternalLinkage, Name,4838&CGM.getModule());4839CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);4840Fn->setDoesNotRecurse();4841CodeGenFunction CGF(CGM);4842CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);48434844// Dst = (void*[n])(LHSArg);4845// Src = (void*[n])(RHSArg);4846Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(4847CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),4848ArgsElemType->getPointerTo()),4849ArgsElemType, CGF.getPointerAlign());4850Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(4851CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),4852ArgsElemType->getPointerTo()),4853ArgsElemType, CGF.getPointerAlign());48544855// ...4856// *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);4857// ...4858CodeGenFunction::OMPPrivateScope Scope(CGF);4859const auto *IPriv = Privates.begin();4860unsigned Idx = 0;4861for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {4862const auto *RHSVar =4863cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());4864Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));4865const auto *LHSVar =4866cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());4867Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));4868QualType PrivTy = (*IPriv)->getType();4869if (PrivTy->isVariablyModifiedType()) {4870// Get array size and emit VLA type.4871++Idx;4872Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);4873llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);4874const VariableArrayType *VLA =4875CGF.getContext().getAsVariableArrayType(PrivTy);4876const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());4877CodeGenFunction::OpaqueValueMapping OpaqueMap(4878CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));4879CGF.EmitVariablyModifiedType(PrivTy);4880}4881}4882Scope.Privatize();4883IPriv = Privates.begin();4884const auto *ILHS = LHSExprs.begin();4885const auto *IRHS = RHSExprs.begin();4886for (const Expr *E : ReductionOps) {4887if ((*IPriv)->getType()->isArrayType()) {4888// Emit reduction for array section.4889const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());4890const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());4891EmitOMPAggregateReduction(4892CGF, (*IPriv)->getType(), LHSVar, RHSVar,4893[=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {4894emitReductionCombiner(CGF, E);4895});4896} else {4897// Emit reduction for array subscript or single variable.4898emitReductionCombiner(CGF, E);4899}4900++IPriv;4901++ILHS;4902++IRHS;4903}4904Scope.ForceCleanup();4905CGF.FinishFunction();4906return Fn;4907}49084909void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,4910const Expr *ReductionOp,4911const Expr *PrivateRef,4912const DeclRefExpr *LHS,4913const DeclRefExpr *RHS) {4914if (PrivateRef->getType()->isArrayType()) {4915// Emit reduction for array section.4916const auto *LHSVar = cast<VarDecl>(LHS->getDecl());4917const auto *RHSVar = cast<VarDecl>(RHS->getDecl());4918EmitOMPAggregateReduction(4919CGF, PrivateRef->getType(), LHSVar, RHSVar,4920[=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {4921emitReductionCombiner(CGF, ReductionOp);4922});4923} else {4924// Emit reduction for array subscript or single variable.4925emitReductionCombiner(CGF, ReductionOp);4926}4927}49284929void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,4930ArrayRef<const Expr *> Privates,4931ArrayRef<const Expr *> LHSExprs,4932ArrayRef<const Expr *> RHSExprs,4933ArrayRef<const Expr *> ReductionOps,4934ReductionOptionsTy Options) {4935if (!CGF.HaveInsertPoint())4936return;49374938bool WithNowait = Options.WithNowait;4939bool SimpleReduction = Options.SimpleReduction;49404941// Next code should be emitted for reduction:4942//4943// static kmp_critical_name lock = { 0 };4944//4945// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {4946// *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);4947// ...4948// *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],4949// *(Type<n>-1*)rhs[<n>-1]);4950// }4951//4952// ...4953// void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};4954// switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),4955// RedList, reduce_func, &<lock>)) {4956// case 1:4957// ...4958// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);4959// ...4960// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);4961// break;4962// case 2:4963// ...4964// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));4965// ...4966// [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]4967// break;4968// default:;4969// }4970//4971// if SimpleReduction is true, only the next code is generated:4972// ...4973// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);4974// ...49754976ASTContext &C = CGM.getContext();49774978if (SimpleReduction) {4979CodeGenFunction::RunCleanupsScope Scope(CGF);4980const auto *IPriv = Privates.begin();4981const auto *ILHS = LHSExprs.begin();4982const auto *IRHS = RHSExprs.begin();4983for (const Expr *E : ReductionOps) {4984emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),4985cast<DeclRefExpr>(*IRHS));4986++IPriv;4987++ILHS;4988++IRHS;4989}4990return;4991}49924993// 1. Build a list of reduction variables.4994// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};4995auto Size = RHSExprs.size();4996for (const Expr *E : Privates) {4997if (E->getType()->isVariablyModifiedType())4998// Reserve place for array size.4999++Size;5000}5001llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);5002QualType ReductionArrayTy = C.getConstantArrayType(5003C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,5004/*IndexTypeQuals=*/0);5005RawAddress ReductionList =5006CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");5007const auto *IPriv = Privates.begin();5008unsigned Idx = 0;5009for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {5010Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);5011CGF.Builder.CreateStore(5012CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(5013CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),5014Elem);5015if ((*IPriv)->getType()->isVariablyModifiedType()) {5016// Store array size.5017++Idx;5018Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);5019llvm::Value *Size = CGF.Builder.CreateIntCast(5020CGF.getVLASize(5021CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))5022.NumElts,5023CGF.SizeTy, /*isSigned=*/false);5024CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),5025Elem);5026}5027}50285029// 2. Emit reduce_func().5030llvm::Function *ReductionFn = emitReductionFunction(5031CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),5032Privates, LHSExprs, RHSExprs, ReductionOps);50335034// 3. Create static kmp_critical_name lock = { 0 };5035std::string Name = getName({"reduction"});5036llvm::Value *Lock = getCriticalRegionLock(Name);50375038// 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),5039// RedList, reduce_func, &<lock>);5040llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);5041llvm::Value *ThreadId = getThreadID(CGF, Loc);5042llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);5043llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(5044ReductionList.getPointer(), CGF.VoidPtrTy);5045llvm::Value *Args[] = {5046IdentTLoc, // ident_t *<loc>5047ThreadId, // i32 <gtid>5048CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>5049ReductionArrayTySize, // size_type sizeof(RedList)5050RL, // void *RedList5051ReductionFn, // void (*) (void *, void *) <reduce_func>5052Lock // kmp_critical_name *&<lock>5053};5054llvm::Value *Res = CGF.EmitRuntimeCall(5055OMPBuilder.getOrCreateRuntimeFunction(5056CGM.getModule(),5057WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),5058Args);50595060// 5. Build switch(res)5061llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");5062llvm::SwitchInst *SwInst =5063CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);50645065// 6. Build case 1:5066// ...5067// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);5068// ...5069// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);5070// break;5071llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");5072SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);5073CGF.EmitBlock(Case1BB);50745075// Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);5076llvm::Value *EndArgs[] = {5077IdentTLoc, // ident_t *<loc>5078ThreadId, // i32 <gtid>5079Lock // kmp_critical_name *&<lock>5080};5081auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](5082CodeGenFunction &CGF, PrePostActionTy &Action) {5083CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();5084const auto *IPriv = Privates.begin();5085const auto *ILHS = LHSExprs.begin();5086const auto *IRHS = RHSExprs.begin();5087for (const Expr *E : ReductionOps) {5088RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),5089cast<DeclRefExpr>(*IRHS));5090++IPriv;5091++ILHS;5092++IRHS;5093}5094};5095RegionCodeGenTy RCG(CodeGen);5096CommonActionTy Action(5097nullptr, std::nullopt,5098OMPBuilder.getOrCreateRuntimeFunction(5099CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait5100: OMPRTL___kmpc_end_reduce),5101EndArgs);5102RCG.setAction(Action);5103RCG(CGF);51045105CGF.EmitBranch(DefaultBB);51065107// 7. Build case 2:5108// ...5109// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));5110// ...5111// break;5112llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");5113SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);5114CGF.EmitBlock(Case2BB);51155116auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](5117CodeGenFunction &CGF, PrePostActionTy &Action) {5118const auto *ILHS = LHSExprs.begin();5119const auto *IRHS = RHSExprs.begin();5120const auto *IPriv = Privates.begin();5121for (const Expr *E : ReductionOps) {5122const Expr *XExpr = nullptr;5123const Expr *EExpr = nullptr;5124const Expr *UpExpr = nullptr;5125BinaryOperatorKind BO = BO_Comma;5126if (const auto *BO = dyn_cast<BinaryOperator>(E)) {5127if (BO->getOpcode() == BO_Assign) {5128XExpr = BO->getLHS();5129UpExpr = BO->getRHS();5130}5131}5132// Try to emit update expression as a simple atomic.5133const Expr *RHSExpr = UpExpr;5134if (RHSExpr) {5135// Analyze RHS part of the whole expression.5136if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(5137RHSExpr->IgnoreParenImpCasts())) {5138// If this is a conditional operator, analyze its condition for5139// min/max reduction operator.5140RHSExpr = ACO->getCond();5141}5142if (const auto *BORHS =5143dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {5144EExpr = BORHS->getRHS();5145BO = BORHS->getOpcode();5146}5147}5148if (XExpr) {5149const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());5150auto &&AtomicRedGen = [BO, VD,5151Loc](CodeGenFunction &CGF, const Expr *XExpr,5152const Expr *EExpr, const Expr *UpExpr) {5153LValue X = CGF.EmitLValue(XExpr);5154RValue E;5155if (EExpr)5156E = CGF.EmitAnyExpr(EExpr);5157CGF.EmitOMPAtomicSimpleUpdateExpr(5158X, E, BO, /*IsXLHSInRHSPart=*/true,5159llvm::AtomicOrdering::Monotonic, Loc,5160[&CGF, UpExpr, VD, Loc](RValue XRValue) {5161CodeGenFunction::OMPPrivateScope PrivateScope(CGF);5162Address LHSTemp = CGF.CreateMemTemp(VD->getType());5163CGF.emitOMPSimpleStore(5164CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,5165VD->getType().getNonReferenceType(), Loc);5166PrivateScope.addPrivate(VD, LHSTemp);5167(void)PrivateScope.Privatize();5168return CGF.EmitAnyExpr(UpExpr);5169});5170};5171if ((*IPriv)->getType()->isArrayType()) {5172// Emit atomic reduction for array section.5173const auto *RHSVar =5174cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());5175EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,5176AtomicRedGen, XExpr, EExpr, UpExpr);5177} else {5178// Emit atomic reduction for array subscript or single variable.5179AtomicRedGen(CGF, XExpr, EExpr, UpExpr);5180}5181} else {5182// Emit as a critical region.5183auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,5184const Expr *, const Expr *) {5185CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();5186std::string Name = RT.getName({"atomic_reduction"});5187RT.emitCriticalRegion(5188CGF, Name,5189[=](CodeGenFunction &CGF, PrePostActionTy &Action) {5190Action.Enter(CGF);5191emitReductionCombiner(CGF, E);5192},5193Loc);5194};5195if ((*IPriv)->getType()->isArrayType()) {5196const auto *LHSVar =5197cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());5198const auto *RHSVar =5199cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());5200EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,5201CritRedGen);5202} else {5203CritRedGen(CGF, nullptr, nullptr, nullptr);5204}5205}5206++ILHS;5207++IRHS;5208++IPriv;5209}5210};5211RegionCodeGenTy AtomicRCG(AtomicCodeGen);5212if (!WithNowait) {5213// Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);5214llvm::Value *EndArgs[] = {5215IdentTLoc, // ident_t *<loc>5216ThreadId, // i32 <gtid>5217Lock // kmp_critical_name *&<lock>5218};5219CommonActionTy Action(nullptr, std::nullopt,5220OMPBuilder.getOrCreateRuntimeFunction(5221CGM.getModule(), OMPRTL___kmpc_end_reduce),5222EndArgs);5223AtomicRCG.setAction(Action);5224AtomicRCG(CGF);5225} else {5226AtomicRCG(CGF);5227}52285229CGF.EmitBranch(DefaultBB);5230CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);5231}52325233/// Generates unique name for artificial threadprivate variables.5234/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"5235static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,5236const Expr *Ref) {5237SmallString<256> Buffer;5238llvm::raw_svector_ostream Out(Buffer);5239const clang::DeclRefExpr *DE;5240const VarDecl *D = ::getBaseDecl(Ref, DE);5241if (!D)5242D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());5243D = D->getCanonicalDecl();5244std::string Name = CGM.getOpenMPRuntime().getName(5245{D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});5246Out << Prefix << Name << "_"5247<< D->getCanonicalDecl()->getBeginLoc().getRawEncoding();5248return std::string(Out.str());5249}52505251/// Emits reduction initializer function:5252/// \code5253/// void @.red_init(void* %arg, void* %orig) {5254/// %0 = bitcast void* %arg to <type>*5255/// store <type> <init>, <type>* %05256/// ret void5257/// }5258/// \endcode5259static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,5260SourceLocation Loc,5261ReductionCodeGen &RCG, unsigned N) {5262ASTContext &C = CGM.getContext();5263QualType VoidPtrTy = C.VoidPtrTy;5264VoidPtrTy.addRestrict();5265FunctionArgList Args;5266ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,5267ImplicitParamKind::Other);5268ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,5269ImplicitParamKind::Other);5270Args.emplace_back(&Param);5271Args.emplace_back(&ParamOrig);5272const auto &FnInfo =5273CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);5274llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);5275std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});5276auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,5277Name, &CGM.getModule());5278CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);5279Fn->setDoesNotRecurse();5280CodeGenFunction CGF(CGM);5281CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);5282QualType PrivateType = RCG.getPrivateType(N);5283Address PrivateAddr = CGF.EmitLoadOfPointer(5284CGF.GetAddrOfLocalVar(&Param).withElementType(5285CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),5286C.getPointerType(PrivateType)->castAs<PointerType>());5287llvm::Value *Size = nullptr;5288// If the size of the reduction item is non-constant, load it from global5289// threadprivate variable.5290if (RCG.getSizes(N).second) {5291Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(5292CGF, CGM.getContext().getSizeType(),5293generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));5294Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,5295CGM.getContext().getSizeType(), Loc);5296}5297RCG.emitAggregateType(CGF, N, Size);5298Address OrigAddr = Address::invalid();5299// If initializer uses initializer from declare reduction construct, emit a5300// pointer to the address of the original reduction item (reuired by reduction5301// initializer)5302if (RCG.usesReductionInitializer(N)) {5303Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);5304OrigAddr = CGF.EmitLoadOfPointer(5305SharedAddr,5306CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());5307}5308// Emit the initializer:5309// %0 = bitcast void* %arg to <type>*5310// store <type> <init>, <type>* %05311RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,5312[](CodeGenFunction &) { return false; });5313CGF.FinishFunction();5314return Fn;5315}53165317/// Emits reduction combiner function:5318/// \code5319/// void @.red_comb(void* %arg0, void* %arg1) {5320/// %lhs = bitcast void* %arg0 to <type>*5321/// %rhs = bitcast void* %arg1 to <type>*5322/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)5323/// store <type> %2, <type>* %lhs5324/// ret void5325/// }5326/// \endcode5327static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,5328SourceLocation Loc,5329ReductionCodeGen &RCG, unsigned N,5330const Expr *ReductionOp,5331const Expr *LHS, const Expr *RHS,5332const Expr *PrivateRef) {5333ASTContext &C = CGM.getContext();5334const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());5335const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());5336FunctionArgList Args;5337ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,5338C.VoidPtrTy, ImplicitParamKind::Other);5339ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,5340ImplicitParamKind::Other);5341Args.emplace_back(&ParamInOut);5342Args.emplace_back(&ParamIn);5343const auto &FnInfo =5344CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);5345llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);5346std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});5347auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,5348Name, &CGM.getModule());5349CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);5350Fn->setDoesNotRecurse();5351CodeGenFunction CGF(CGM);5352CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);5353llvm::Value *Size = nullptr;5354// If the size of the reduction item is non-constant, load it from global5355// threadprivate variable.5356if (RCG.getSizes(N).second) {5357Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(5358CGF, CGM.getContext().getSizeType(),5359generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));5360Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,5361CGM.getContext().getSizeType(), Loc);5362}5363RCG.emitAggregateType(CGF, N, Size);5364// Remap lhs and rhs variables to the addresses of the function arguments.5365// %lhs = bitcast void* %arg0 to <type>*5366// %rhs = bitcast void* %arg1 to <type>*5367CodeGenFunction::OMPPrivateScope PrivateScope(CGF);5368PrivateScope.addPrivate(5369LHSVD,5370// Pull out the pointer to the variable.5371CGF.EmitLoadOfPointer(5372CGF.GetAddrOfLocalVar(&ParamInOut)5373.withElementType(5374CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),5375C.getPointerType(LHSVD->getType())->castAs<PointerType>()));5376PrivateScope.addPrivate(5377RHSVD,5378// Pull out the pointer to the variable.5379CGF.EmitLoadOfPointer(5380CGF.GetAddrOfLocalVar(&ParamIn).withElementType(5381CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),5382C.getPointerType(RHSVD->getType())->castAs<PointerType>()));5383PrivateScope.Privatize();5384// Emit the combiner body:5385// %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)5386// store <type> %2, <type>* %lhs5387CGM.getOpenMPRuntime().emitSingleReductionCombiner(5388CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),5389cast<DeclRefExpr>(RHS));5390CGF.FinishFunction();5391return Fn;5392}53935394/// Emits reduction finalizer function:5395/// \code5396/// void @.red_fini(void* %arg) {5397/// %0 = bitcast void* %arg to <type>*5398/// <destroy>(<type>* %0)5399/// ret void5400/// }5401/// \endcode5402static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,5403SourceLocation Loc,5404ReductionCodeGen &RCG, unsigned N) {5405if (!RCG.needCleanups(N))5406return nullptr;5407ASTContext &C = CGM.getContext();5408FunctionArgList Args;5409ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,5410ImplicitParamKind::Other);5411Args.emplace_back(&Param);5412const auto &FnInfo =5413CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);5414llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);5415std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});5416auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,5417Name, &CGM.getModule());5418CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);5419Fn->setDoesNotRecurse();5420CodeGenFunction CGF(CGM);5421CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);5422Address PrivateAddr = CGF.EmitLoadOfPointer(5423CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());5424llvm::Value *Size = nullptr;5425// If the size of the reduction item is non-constant, load it from global5426// threadprivate variable.5427if (RCG.getSizes(N).second) {5428Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(5429CGF, CGM.getContext().getSizeType(),5430generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));5431Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,5432CGM.getContext().getSizeType(), Loc);5433}5434RCG.emitAggregateType(CGF, N, Size);5435// Emit the finalizer body:5436// <destroy>(<type>* %0)5437RCG.emitCleanups(CGF, N, PrivateAddr);5438CGF.FinishFunction(Loc);5439return Fn;5440}54415442llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(5443CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,5444ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {5445if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())5446return nullptr;54475448// Build typedef struct:5449// kmp_taskred_input {5450// void *reduce_shar; // shared reduction item5451// void *reduce_orig; // original reduction item used for initialization5452// size_t reduce_size; // size of data item5453// void *reduce_init; // data initialization routine5454// void *reduce_fini; // data finalization routine5455// void *reduce_comb; // data combiner routine5456// kmp_task_red_flags_t flags; // flags for additional info from compiler5457// } kmp_taskred_input_t;5458ASTContext &C = CGM.getContext();5459RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");5460RD->startDefinition();5461const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);5462const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);5463const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());5464const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);5465const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);5466const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);5467const FieldDecl *FlagsFD = addFieldToRecordDecl(5468C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));5469RD->completeDefinition();5470QualType RDType = C.getRecordType(RD);5471unsigned Size = Data.ReductionVars.size();5472llvm::APInt ArraySize(/*numBits=*/64, Size);5473QualType ArrayRDType =5474C.getConstantArrayType(RDType, ArraySize, nullptr,5475ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);5476// kmp_task_red_input_t .rd_input.[Size];5477RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");5478ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,5479Data.ReductionCopies, Data.ReductionOps);5480for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {5481// kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];5482llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),5483llvm::ConstantInt::get(CGM.SizeTy, Cnt)};5484llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(5485TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,5486/*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,5487".rd_input.gep.");5488LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);5489// ElemLVal.reduce_shar = &Shareds[Cnt];5490LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);5491RCG.emitSharedOrigLValue(CGF, Cnt);5492llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);5493CGF.EmitStoreOfScalar(Shared, SharedLVal);5494// ElemLVal.reduce_orig = &Origs[Cnt];5495LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);5496llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);5497CGF.EmitStoreOfScalar(Orig, OrigLVal);5498RCG.emitAggregateType(CGF, Cnt);5499llvm::Value *SizeValInChars;5500llvm::Value *SizeVal;5501std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);5502// We use delayed creation/initialization for VLAs and array sections. It is5503// required because runtime does not provide the way to pass the sizes of5504// VLAs/array sections to initializer/combiner/finalizer functions. Instead5505// threadprivate global variables are used to store these values and use5506// them in the functions.5507bool DelayedCreation = !!SizeVal;5508SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,5509/*isSigned=*/false);5510LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);5511CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);5512// ElemLVal.reduce_init = init;5513LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);5514llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);5515CGF.EmitStoreOfScalar(InitAddr, InitLVal);5516// ElemLVal.reduce_fini = fini;5517LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);5518llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);5519llvm::Value *FiniAddr =5520Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);5521CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);5522// ElemLVal.reduce_comb = comb;5523LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);5524llvm::Value *CombAddr = emitReduceCombFunction(5525CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],5526RHSExprs[Cnt], Data.ReductionCopies[Cnt]);5527CGF.EmitStoreOfScalar(CombAddr, CombLVal);5528// ElemLVal.flags = 0;5529LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);5530if (DelayedCreation) {5531CGF.EmitStoreOfScalar(5532llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),5533FlagsLVal);5534} else5535CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());5536}5537if (Data.IsReductionWithTaskMod) {5538// Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int5539// is_ws, int num, void *data);5540llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);5541llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),5542CGM.IntTy, /*isSigned=*/true);5543llvm::Value *Args[] = {5544IdentTLoc, GTid,5545llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,5546/*isSigned=*/true),5547llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),5548CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(5549TaskRedInput.getPointer(), CGM.VoidPtrTy)};5550return CGF.EmitRuntimeCall(5551OMPBuilder.getOrCreateRuntimeFunction(5552CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),5553Args);5554}5555// Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);5556llvm::Value *Args[] = {5557CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,5558/*isSigned=*/true),5559llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),5560CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),5561CGM.VoidPtrTy)};5562return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(5563CGM.getModule(), OMPRTL___kmpc_taskred_init),5564Args);5565}55665567void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,5568SourceLocation Loc,5569bool IsWorksharingReduction) {5570// Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int5571// is_ws, int num, void *data);5572llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);5573llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),5574CGM.IntTy, /*isSigned=*/true);5575llvm::Value *Args[] = {IdentTLoc, GTid,5576llvm::ConstantInt::get(CGM.IntTy,5577IsWorksharingReduction ? 1 : 0,5578/*isSigned=*/true)};5579(void)CGF.EmitRuntimeCall(5580OMPBuilder.getOrCreateRuntimeFunction(5581CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),5582Args);5583}55845585void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,5586SourceLocation Loc,5587ReductionCodeGen &RCG,5588unsigned N) {5589auto Sizes = RCG.getSizes(N);5590// Emit threadprivate global variable if the type is non-constant5591// (Sizes.second = nullptr).5592if (Sizes.second) {5593llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,5594/*isSigned=*/false);5595Address SizeAddr = getAddrOfArtificialThreadPrivate(5596CGF, CGM.getContext().getSizeType(),5597generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));5598CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);5599}5600}56015602Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,5603SourceLocation Loc,5604llvm::Value *ReductionsPtr,5605LValue SharedLVal) {5606// Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void5607// *d);5608llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),5609CGM.IntTy,5610/*isSigned=*/true),5611ReductionsPtr,5612CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(5613SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};5614return Address(5615CGF.EmitRuntimeCall(5616OMPBuilder.getOrCreateRuntimeFunction(5617CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),5618Args),5619CGF.Int8Ty, SharedLVal.getAlignment());5620}56215622void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,5623const OMPTaskDataTy &Data) {5624if (!CGF.HaveInsertPoint())5625return;56265627if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {5628// TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.5629OMPBuilder.createTaskwait(CGF.Builder);5630} else {5631llvm::Value *ThreadID = getThreadID(CGF, Loc);5632llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);5633auto &M = CGM.getModule();5634Address DependenciesArray = Address::invalid();5635llvm::Value *NumOfElements;5636std::tie(NumOfElements, DependenciesArray) =5637emitDependClause(CGF, Data.Dependences, Loc);5638if (!Data.Dependences.empty()) {5639llvm::Value *DepWaitTaskArgs[7];5640DepWaitTaskArgs[0] = UpLoc;5641DepWaitTaskArgs[1] = ThreadID;5642DepWaitTaskArgs[2] = NumOfElements;5643DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);5644DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);5645DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);5646DepWaitTaskArgs[6] =5647llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);56485649CodeGenFunction::RunCleanupsScope LocalScope(CGF);56505651// Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,5652// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int325653// ndeps_noalias, kmp_depend_info_t *noalias_dep_list,5654// kmp_int32 has_no_wait); if dependence info is specified.5655CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(5656M, OMPRTL___kmpc_omp_taskwait_deps_51),5657DepWaitTaskArgs);56585659} else {56605661// Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int325662// global_tid);5663llvm::Value *Args[] = {UpLoc, ThreadID};5664// Ignore return result until untied tasks are supported.5665CGF.EmitRuntimeCall(5666OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),5667Args);5668}5669}56705671if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))5672Region->emitUntiedSwitch(CGF);5673}56745675void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,5676OpenMPDirectiveKind InnerKind,5677const RegionCodeGenTy &CodeGen,5678bool HasCancel) {5679if (!CGF.HaveInsertPoint())5680return;5681InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,5682InnerKind != OMPD_critical &&5683InnerKind != OMPD_master &&5684InnerKind != OMPD_masked);5685CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);5686}56875688namespace {5689enum RTCancelKind {5690CancelNoreq = 0,5691CancelParallel = 1,5692CancelLoop = 2,5693CancelSections = 3,5694CancelTaskgroup = 45695};5696} // anonymous namespace56975698static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {5699RTCancelKind CancelKind = CancelNoreq;5700if (CancelRegion == OMPD_parallel)5701CancelKind = CancelParallel;5702else if (CancelRegion == OMPD_for)5703CancelKind = CancelLoop;5704else if (CancelRegion == OMPD_sections)5705CancelKind = CancelSections;5706else {5707assert(CancelRegion == OMPD_taskgroup);5708CancelKind = CancelTaskgroup;5709}5710return CancelKind;5711}57125713void CGOpenMPRuntime::emitCancellationPointCall(5714CodeGenFunction &CGF, SourceLocation Loc,5715OpenMPDirectiveKind CancelRegion) {5716if (!CGF.HaveInsertPoint())5717return;5718// Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int325719// global_tid, kmp_int32 cncl_kind);5720if (auto *OMPRegionInfo =5721dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {5722// For 'cancellation point taskgroup', the task region info may not have a5723// cancel. This may instead happen in another adjacent task.5724if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {5725llvm::Value *Args[] = {5726emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),5727CGF.Builder.getInt32(getCancellationKind(CancelRegion))};5728// Ignore return result until untied tasks are supported.5729llvm::Value *Result = CGF.EmitRuntimeCall(5730OMPBuilder.getOrCreateRuntimeFunction(5731CGM.getModule(), OMPRTL___kmpc_cancellationpoint),5732Args);5733// if (__kmpc_cancellationpoint()) {5734// call i32 @__kmpc_cancel_barrier( // for parallel cancellation only5735// exit from construct;5736// }5737llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");5738llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");5739llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);5740CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);5741CGF.EmitBlock(ExitBB);5742if (CancelRegion == OMPD_parallel)5743emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);5744// exit from construct;5745CodeGenFunction::JumpDest CancelDest =5746CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());5747CGF.EmitBranchThroughCleanup(CancelDest);5748CGF.EmitBlock(ContBB, /*IsFinished=*/true);5749}5750}5751}57525753void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,5754const Expr *IfCond,5755OpenMPDirectiveKind CancelRegion) {5756if (!CGF.HaveInsertPoint())5757return;5758// Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,5759// kmp_int32 cncl_kind);5760auto &M = CGM.getModule();5761if (auto *OMPRegionInfo =5762dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {5763auto &&ThenGen = [this, &M, Loc, CancelRegion,5764OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {5765CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();5766llvm::Value *Args[] = {5767RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),5768CGF.Builder.getInt32(getCancellationKind(CancelRegion))};5769// Ignore return result until untied tasks are supported.5770llvm::Value *Result = CGF.EmitRuntimeCall(5771OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);5772// if (__kmpc_cancel()) {5773// call i32 @__kmpc_cancel_barrier( // for parallel cancellation only5774// exit from construct;5775// }5776llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");5777llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");5778llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);5779CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);5780CGF.EmitBlock(ExitBB);5781if (CancelRegion == OMPD_parallel)5782RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);5783// exit from construct;5784CodeGenFunction::JumpDest CancelDest =5785CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());5786CGF.EmitBranchThroughCleanup(CancelDest);5787CGF.EmitBlock(ContBB, /*IsFinished=*/true);5788};5789if (IfCond) {5790emitIfClause(CGF, IfCond, ThenGen,5791[](CodeGenFunction &, PrePostActionTy &) {});5792} else {5793RegionCodeGenTy ThenRCG(ThenGen);5794ThenRCG(CGF);5795}5796}5797}57985799namespace {5800/// Cleanup action for uses_allocators support.5801class OMPUsesAllocatorsActionTy final : public PrePostActionTy {5802ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;58035804public:5805OMPUsesAllocatorsActionTy(5806ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)5807: Allocators(Allocators) {}5808void Enter(CodeGenFunction &CGF) override {5809if (!CGF.HaveInsertPoint())5810return;5811for (const auto &AllocatorData : Allocators) {5812CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(5813CGF, AllocatorData.first, AllocatorData.second);5814}5815}5816void Exit(CodeGenFunction &CGF) override {5817if (!CGF.HaveInsertPoint())5818return;5819for (const auto &AllocatorData : Allocators) {5820CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,5821AllocatorData.first);5822}5823}5824};5825} // namespace58265827void CGOpenMPRuntime::emitTargetOutlinedFunction(5828const OMPExecutableDirective &D, StringRef ParentName,5829llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,5830bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {5831assert(!ParentName.empty() && "Invalid target entry parent name!");5832HasEmittedTargetRegion = true;5833SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;5834for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {5835for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {5836const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);5837if (!D.AllocatorTraits)5838continue;5839Allocators.emplace_back(D.Allocator, D.AllocatorTraits);5840}5841}5842OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);5843CodeGen.setAction(UsesAllocatorAction);5844emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,5845IsOffloadEntry, CodeGen);5846}58475848void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,5849const Expr *Allocator,5850const Expr *AllocatorTraits) {5851llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());5852ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);5853// Use default memspace handle.5854llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);5855llvm::Value *NumTraits = llvm::ConstantInt::get(5856CGF.IntTy, cast<ConstantArrayType>(5857AllocatorTraits->getType()->getAsArrayTypeUnsafe())5858->getSize()5859.getLimitedValue());5860LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);5861Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(5862AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);5863AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,5864AllocatorTraitsLVal.getBaseInfo(),5865AllocatorTraitsLVal.getTBAAInfo());5866llvm::Value *Traits = Addr.emitRawPointer(CGF);58675868llvm::Value *AllocatorVal =5869CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(5870CGM.getModule(), OMPRTL___kmpc_init_allocator),5871{ThreadId, MemSpaceHandle, NumTraits, Traits});5872// Store to allocator.5873CGF.EmitAutoVarAlloca(*cast<VarDecl>(5874cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));5875LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());5876AllocatorVal =5877CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,5878Allocator->getType(), Allocator->getExprLoc());5879CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);5880}58815882void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,5883const Expr *Allocator) {5884llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());5885ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);5886LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());5887llvm::Value *AllocatorVal =5888CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());5889AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),5890CGF.getContext().VoidPtrTy,5891Allocator->getExprLoc());5892(void)CGF.EmitRuntimeCall(5893OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),5894OMPRTL___kmpc_destroy_allocator),5895{ThreadId, AllocatorVal});5896}58975898void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(5899const OMPExecutableDirective &D, CodeGenFunction &CGF,5900int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,5901int32_t &MaxTeamsVal) {59025903getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);5904getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,5905/*UpperBoundOnly=*/true);59065907for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {5908for (auto *A : C->getAttrs()) {5909int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;5910int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;5911if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))5912CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,5913&AttrMinBlocksVal, &AttrMaxBlocksVal);5914else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))5915CGM.handleAMDGPUFlatWorkGroupSizeAttr(5916nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,5917&AttrMaxThreadsVal);5918else5919continue;59205921MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);5922if (AttrMaxThreadsVal > 0)5923MaxThreadsVal = MaxThreadsVal > 05924? std::min(MaxThreadsVal, AttrMaxThreadsVal)5925: AttrMaxThreadsVal;5926MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);5927if (AttrMaxBlocksVal > 0)5928MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)5929: AttrMaxBlocksVal;5930}5931}5932}59335934void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(5935const OMPExecutableDirective &D, StringRef ParentName,5936llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,5937bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {59385939llvm::TargetRegionEntryInfo EntryInfo =5940getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);59415942CodeGenFunction CGF(CGM, true);5943llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =5944[&CGF, &D, &CodeGen](StringRef EntryFnName) {5945const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);59465947CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);5948CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);5949return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());5950};59515952OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,5953IsOffloadEntry, OutlinedFn, OutlinedFnID);59545955if (!OutlinedFn)5956return;59575958CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);59595960for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {5961for (auto *A : C->getAttrs()) {5962if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))5963CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);5964}5965}5966}59675968/// Checks if the expression is constant or does not have non-trivial function5969/// calls.5970static bool isTrivial(ASTContext &Ctx, const Expr * E) {5971// We can skip constant expressions.5972// We can skip expressions with trivial calls or simple expressions.5973return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||5974!E->hasNonTrivialCall(Ctx)) &&5975!E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);5976}59775978const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,5979const Stmt *Body) {5980const Stmt *Child = Body->IgnoreContainers();5981while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {5982Child = nullptr;5983for (const Stmt *S : C->body()) {5984if (const auto *E = dyn_cast<Expr>(S)) {5985if (isTrivial(Ctx, E))5986continue;5987}5988// Some of the statements can be ignored.5989if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||5990isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))5991continue;5992// Analyze declarations.5993if (const auto *DS = dyn_cast<DeclStmt>(S)) {5994if (llvm::all_of(DS->decls(), [](const Decl *D) {5995if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||5996isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||5997isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||5998isa<UsingDirectiveDecl>(D) ||5999isa<OMPDeclareReductionDecl>(D) ||6000isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))6001return true;6002const auto *VD = dyn_cast<VarDecl>(D);6003if (!VD)6004return false;6005return VD->hasGlobalStorage() || !VD->isUsed();6006}))6007continue;6008}6009// Found multiple children - cannot get the one child only.6010if (Child)6011return nullptr;6012Child = S;6013}6014if (Child)6015Child = Child->IgnoreContainers();6016}6017return Child;6018}60196020const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(6021CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,6022int32_t &MaxTeamsVal) {60236024OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();6025assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&6026"Expected target-based executable directive.");6027switch (DirectiveKind) {6028case OMPD_target: {6029const auto *CS = D.getInnermostCapturedStmt();6030const auto *Body =6031CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);6032const Stmt *ChildStmt =6033CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);6034if (const auto *NestedDir =6035dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {6036if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {6037if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {6038const Expr *NumTeams =6039NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();6040if (NumTeams->isIntegerConstantExpr(CGF.getContext()))6041if (auto Constant =6042NumTeams->getIntegerConstantExpr(CGF.getContext()))6043MinTeamsVal = MaxTeamsVal = Constant->getExtValue();6044return NumTeams;6045}6046MinTeamsVal = MaxTeamsVal = 0;6047return nullptr;6048}6049if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||6050isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {6051MinTeamsVal = MaxTeamsVal = 1;6052return nullptr;6053}6054MinTeamsVal = MaxTeamsVal = 1;6055return nullptr;6056}6057// A value of -1 is used to check if we need to emit no teams region6058MinTeamsVal = MaxTeamsVal = -1;6059return nullptr;6060}6061case OMPD_target_teams_loop:6062case OMPD_target_teams:6063case OMPD_target_teams_distribute:6064case OMPD_target_teams_distribute_simd:6065case OMPD_target_teams_distribute_parallel_for:6066case OMPD_target_teams_distribute_parallel_for_simd: {6067if (D.hasClausesOfKind<OMPNumTeamsClause>()) {6068const Expr *NumTeams =6069D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();6070if (NumTeams->isIntegerConstantExpr(CGF.getContext()))6071if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))6072MinTeamsVal = MaxTeamsVal = Constant->getExtValue();6073return NumTeams;6074}6075MinTeamsVal = MaxTeamsVal = 0;6076return nullptr;6077}6078case OMPD_target_parallel:6079case OMPD_target_parallel_for:6080case OMPD_target_parallel_for_simd:6081case OMPD_target_parallel_loop:6082case OMPD_target_simd:6083MinTeamsVal = MaxTeamsVal = 1;6084return nullptr;6085case OMPD_parallel:6086case OMPD_for:6087case OMPD_parallel_for:6088case OMPD_parallel_loop:6089case OMPD_parallel_master:6090case OMPD_parallel_sections:6091case OMPD_for_simd:6092case OMPD_parallel_for_simd:6093case OMPD_cancel:6094case OMPD_cancellation_point:6095case OMPD_ordered:6096case OMPD_threadprivate:6097case OMPD_allocate:6098case OMPD_task:6099case OMPD_simd:6100case OMPD_tile:6101case OMPD_unroll:6102case OMPD_sections:6103case OMPD_section:6104case OMPD_single:6105case OMPD_master:6106case OMPD_critical:6107case OMPD_taskyield:6108case OMPD_barrier:6109case OMPD_taskwait:6110case OMPD_taskgroup:6111case OMPD_atomic:6112case OMPD_flush:6113case OMPD_depobj:6114case OMPD_scan:6115case OMPD_teams:6116case OMPD_target_data:6117case OMPD_target_exit_data:6118case OMPD_target_enter_data:6119case OMPD_distribute:6120case OMPD_distribute_simd:6121case OMPD_distribute_parallel_for:6122case OMPD_distribute_parallel_for_simd:6123case OMPD_teams_distribute:6124case OMPD_teams_distribute_simd:6125case OMPD_teams_distribute_parallel_for:6126case OMPD_teams_distribute_parallel_for_simd:6127case OMPD_target_update:6128case OMPD_declare_simd:6129case OMPD_declare_variant:6130case OMPD_begin_declare_variant:6131case OMPD_end_declare_variant:6132case OMPD_declare_target:6133case OMPD_end_declare_target:6134case OMPD_declare_reduction:6135case OMPD_declare_mapper:6136case OMPD_taskloop:6137case OMPD_taskloop_simd:6138case OMPD_master_taskloop:6139case OMPD_master_taskloop_simd:6140case OMPD_parallel_master_taskloop:6141case OMPD_parallel_master_taskloop_simd:6142case OMPD_requires:6143case OMPD_metadirective:6144case OMPD_unknown:6145break;6146default:6147break;6148}6149llvm_unreachable("Unexpected directive kind.");6150}61516152llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(6153CodeGenFunction &CGF, const OMPExecutableDirective &D) {6154assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&6155"Clauses associated with the teams directive expected to be emitted "6156"only for the host!");6157CGBuilderTy &Bld = CGF.Builder;6158int32_t MinNT = -1, MaxNT = -1;6159const Expr *NumTeams =6160getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);6161if (NumTeams != nullptr) {6162OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();61636164switch (DirectiveKind) {6165case OMPD_target: {6166const auto *CS = D.getInnermostCapturedStmt();6167CGOpenMPInnerExprInfo CGInfo(CGF, *CS);6168CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);6169llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,6170/*IgnoreResultAssign*/ true);6171return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,6172/*isSigned=*/true);6173}6174case OMPD_target_teams:6175case OMPD_target_teams_distribute:6176case OMPD_target_teams_distribute_simd:6177case OMPD_target_teams_distribute_parallel_for:6178case OMPD_target_teams_distribute_parallel_for_simd: {6179CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);6180llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,6181/*IgnoreResultAssign*/ true);6182return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,6183/*isSigned=*/true);6184}6185default:6186break;6187}6188}61896190assert(MinNT == MaxNT && "Num threads ranges require handling here.");6191return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);6192}61936194/// Check for a num threads constant value (stored in \p DefaultVal), or6195/// expression (stored in \p E). If the value is conditional (via an if-clause),6196/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are6197/// nullptr, no expression evaluation is perfomed.6198static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,6199const Expr **E, int32_t &UpperBound,6200bool UpperBoundOnly, llvm::Value **CondVal) {6201const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(6202CGF.getContext(), CS->getCapturedStmt());6203const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);6204if (!Dir)6205return;62066207if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {6208// Handle if clause. If if clause present, the number of threads is6209// calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.6210if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {6211CGOpenMPInnerExprInfo CGInfo(CGF, *CS);6212CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);6213const OMPIfClause *IfClause = nullptr;6214for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {6215if (C->getNameModifier() == OMPD_unknown ||6216C->getNameModifier() == OMPD_parallel) {6217IfClause = C;6218break;6219}6220}6221if (IfClause) {6222const Expr *CondExpr = IfClause->getCondition();6223bool Result;6224if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {6225if (!Result) {6226UpperBound = 1;6227return;6228}6229} else {6230CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());6231if (const auto *PreInit =6232cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {6233for (const auto *I : PreInit->decls()) {6234if (!I->hasAttr<OMPCaptureNoInitAttr>()) {6235CGF.EmitVarDecl(cast<VarDecl>(*I));6236} else {6237CodeGenFunction::AutoVarEmission Emission =6238CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));6239CGF.EmitAutoVarCleanups(Emission);6240}6241}6242*CondVal = CGF.EvaluateExprAsBool(CondExpr);6243}6244}6245}6246}6247// Check the value of num_threads clause iff if clause was not specified6248// or is not evaluated to false.6249if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {6250CGOpenMPInnerExprInfo CGInfo(CGF, *CS);6251CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);6252const auto *NumThreadsClause =6253Dir->getSingleClause<OMPNumThreadsClause>();6254const Expr *NTExpr = NumThreadsClause->getNumThreads();6255if (NTExpr->isIntegerConstantExpr(CGF.getContext()))6256if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))6257UpperBound =6258UpperBound6259? Constant->getZExtValue()6260: std::min(UpperBound,6261static_cast<int32_t>(Constant->getZExtValue()));6262// If we haven't found a upper bound, remember we saw a thread limiting6263// clause.6264if (UpperBound == -1)6265UpperBound = 0;6266if (!E)6267return;6268CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());6269if (const auto *PreInit =6270cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {6271for (const auto *I : PreInit->decls()) {6272if (!I->hasAttr<OMPCaptureNoInitAttr>()) {6273CGF.EmitVarDecl(cast<VarDecl>(*I));6274} else {6275CodeGenFunction::AutoVarEmission Emission =6276CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));6277CGF.EmitAutoVarCleanups(Emission);6278}6279}6280}6281*E = NTExpr;6282}6283return;6284}6285if (isOpenMPSimdDirective(Dir->getDirectiveKind()))6286UpperBound = 1;6287}62886289const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(6290CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,6291bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {6292assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&6293"Clauses associated with the teams directive expected to be emitted "6294"only for the host!");6295OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();6296assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&6297"Expected target-based executable directive.");62986299const Expr *NT = nullptr;6300const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;63016302auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {6303if (E->isIntegerConstantExpr(CGF.getContext())) {6304if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))6305UpperBound = UpperBound ? Constant->getZExtValue()6306: std::min(UpperBound,6307int32_t(Constant->getZExtValue()));6308}6309// If we haven't found a upper bound, remember we saw a thread limiting6310// clause.6311if (UpperBound == -1)6312UpperBound = 0;6313if (EPtr)6314*EPtr = E;6315};63166317auto ReturnSequential = [&]() {6318UpperBound = 1;6319return NT;6320};63216322switch (DirectiveKind) {6323case OMPD_target: {6324const CapturedStmt *CS = D.getInnermostCapturedStmt();6325getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);6326const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(6327CGF.getContext(), CS->getCapturedStmt());6328// TODO: The standard is not clear how to resolve two thread limit clauses,6329// let's pick the teams one if it's present, otherwise the target one.6330const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();6331if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {6332if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {6333ThreadLimitClause = TLC;6334if (ThreadLimitExpr) {6335CGOpenMPInnerExprInfo CGInfo(CGF, *CS);6336CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);6337CodeGenFunction::LexicalScope Scope(6338CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());6339if (const auto *PreInit =6340cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {6341for (const auto *I : PreInit->decls()) {6342if (!I->hasAttr<OMPCaptureNoInitAttr>()) {6343CGF.EmitVarDecl(cast<VarDecl>(*I));6344} else {6345CodeGenFunction::AutoVarEmission Emission =6346CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));6347CGF.EmitAutoVarCleanups(Emission);6348}6349}6350}6351}6352}6353}6354if (ThreadLimitClause)6355CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);6356if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {6357if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&6358!isOpenMPDistributeDirective(Dir->getDirectiveKind())) {6359CS = Dir->getInnermostCapturedStmt();6360const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(6361CGF.getContext(), CS->getCapturedStmt());6362Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);6363}6364if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {6365CS = Dir->getInnermostCapturedStmt();6366getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);6367} else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))6368return ReturnSequential();6369}6370return NT;6371}6372case OMPD_target_teams: {6373if (D.hasClausesOfKind<OMPThreadLimitClause>()) {6374CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);6375const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();6376CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);6377}6378const CapturedStmt *CS = D.getInnermostCapturedStmt();6379getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);6380const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(6381CGF.getContext(), CS->getCapturedStmt());6382if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {6383if (Dir->getDirectiveKind() == OMPD_distribute) {6384CS = Dir->getInnermostCapturedStmt();6385getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);6386}6387}6388return NT;6389}6390case OMPD_target_teams_distribute:6391if (D.hasClausesOfKind<OMPThreadLimitClause>()) {6392CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);6393const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();6394CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);6395}6396getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,6397UpperBoundOnly, CondVal);6398return NT;6399case OMPD_target_teams_loop:6400case OMPD_target_parallel_loop:6401case OMPD_target_parallel:6402case OMPD_target_parallel_for:6403case OMPD_target_parallel_for_simd:6404case OMPD_target_teams_distribute_parallel_for:6405case OMPD_target_teams_distribute_parallel_for_simd: {6406if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {6407const OMPIfClause *IfClause = nullptr;6408for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {6409if (C->getNameModifier() == OMPD_unknown ||6410C->getNameModifier() == OMPD_parallel) {6411IfClause = C;6412break;6413}6414}6415if (IfClause) {6416const Expr *Cond = IfClause->getCondition();6417bool Result;6418if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {6419if (!Result)6420return ReturnSequential();6421} else {6422CodeGenFunction::RunCleanupsScope Scope(CGF);6423*CondVal = CGF.EvaluateExprAsBool(Cond);6424}6425}6426}6427if (D.hasClausesOfKind<OMPThreadLimitClause>()) {6428CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);6429const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();6430CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);6431}6432if (D.hasClausesOfKind<OMPNumThreadsClause>()) {6433CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);6434const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();6435CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);6436return NumThreadsClause->getNumThreads();6437}6438return NT;6439}6440case OMPD_target_teams_distribute_simd:6441case OMPD_target_simd:6442return ReturnSequential();6443default:6444break;6445}6446llvm_unreachable("Unsupported directive kind.");6447}64486449llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(6450CodeGenFunction &CGF, const OMPExecutableDirective &D) {6451llvm::Value *NumThreadsVal = nullptr;6452llvm::Value *CondVal = nullptr;6453llvm::Value *ThreadLimitVal = nullptr;6454const Expr *ThreadLimitExpr = nullptr;6455int32_t UpperBound = -1;64566457const Expr *NT = getNumThreadsExprForTargetDirective(6458CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,6459&ThreadLimitExpr);64606461// Thread limit expressions are used below, emit them.6462if (ThreadLimitExpr) {6463ThreadLimitVal =6464CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);6465ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,6466/*isSigned=*/false);6467}64686469// Generate the num teams expression.6470if (UpperBound == 1) {6471NumThreadsVal = CGF.Builder.getInt32(UpperBound);6472} else if (NT) {6473NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);6474NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,6475/*isSigned=*/false);6476} else if (ThreadLimitVal) {6477// If we do not have a num threads value but a thread limit, replace the6478// former with the latter. We know handled the thread limit expression.6479NumThreadsVal = ThreadLimitVal;6480ThreadLimitVal = nullptr;6481} else {6482// Default to "0" which means runtime choice.6483assert(!ThreadLimitVal && "Default not applicable with thread limit value");6484NumThreadsVal = CGF.Builder.getInt32(0);6485}64866487// Handle if clause. If if clause present, the number of threads is6488// calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.6489if (CondVal) {6490CodeGenFunction::RunCleanupsScope Scope(CGF);6491NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,6492CGF.Builder.getInt32(1));6493}64946495// If the thread limit and num teams expression were present, take the6496// minimum.6497if (ThreadLimitVal) {6498NumThreadsVal = CGF.Builder.CreateSelect(6499CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),6500ThreadLimitVal, NumThreadsVal);6501}65026503return NumThreadsVal;6504}65056506namespace {6507LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();65086509// Utility to handle information from clauses associated with a given6510// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).6511// It provides a convenient interface to obtain the information and generate6512// code for that information.6513class MappableExprsHandler {6514public:6515/// Get the offset of the OMP_MAP_MEMBER_OF field.6516static unsigned getFlagMemberOffset() {6517unsigned Offset = 0;6518for (uint64_t Remain =6519static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(6520OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);6521!(Remain & 1); Remain = Remain >> 1)6522Offset++;6523return Offset;6524}65256526/// Class that holds debugging information for a data mapping to be passed to6527/// the runtime library.6528class MappingExprInfo {6529/// The variable declaration used for the data mapping.6530const ValueDecl *MapDecl = nullptr;6531/// The original expression used in the map clause, or null if there is6532/// none.6533const Expr *MapExpr = nullptr;65346535public:6536MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)6537: MapDecl(MapDecl), MapExpr(MapExpr) {}65386539const ValueDecl *getMapDecl() const { return MapDecl; }6540const Expr *getMapExpr() const { return MapExpr; }6541};65426543using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;6544using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;6545using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;6546using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;6547using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;6548using MapNonContiguousArrayTy =6549llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;6550using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;6551using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;65526553/// This structure contains combined information generated for mappable6554/// clauses, including base pointers, pointers, sizes, map types, user-defined6555/// mappers, and non-contiguous information.6556struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {6557MapExprsArrayTy Exprs;6558MapValueDeclsArrayTy Mappers;6559MapValueDeclsArrayTy DevicePtrDecls;65606561/// Append arrays in \a CurInfo.6562void append(MapCombinedInfoTy &CurInfo) {6563Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());6564DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),6565CurInfo.DevicePtrDecls.end());6566Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());6567llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);6568}6569};65706571/// Map between a struct and the its lowest & highest elements which have been6572/// mapped.6573/// [ValueDecl *] --> {LE(FieldIndex, Pointer),6574/// HE(FieldIndex, Pointer)}6575struct StructRangeInfoTy {6576MapCombinedInfoTy PreliminaryMapData;6577std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {65780, Address::invalid()};6579std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {65800, Address::invalid()};6581Address Base = Address::invalid();6582Address LB = Address::invalid();6583bool IsArraySection = false;6584bool HasCompleteRecord = false;6585};65866587private:6588/// Kind that defines how a device pointer has to be returned.6589struct MapInfo {6590OMPClauseMappableExprCommon::MappableExprComponentListRef Components;6591OpenMPMapClauseKind MapType = OMPC_MAP_unknown;6592ArrayRef<OpenMPMapModifierKind> MapModifiers;6593ArrayRef<OpenMPMotionModifierKind> MotionModifiers;6594bool ReturnDevicePointer = false;6595bool IsImplicit = false;6596const ValueDecl *Mapper = nullptr;6597const Expr *VarRef = nullptr;6598bool ForDeviceAddr = false;65996600MapInfo() = default;6601MapInfo(6602OMPClauseMappableExprCommon::MappableExprComponentListRef Components,6603OpenMPMapClauseKind MapType,6604ArrayRef<OpenMPMapModifierKind> MapModifiers,6605ArrayRef<OpenMPMotionModifierKind> MotionModifiers,6606bool ReturnDevicePointer, bool IsImplicit,6607const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,6608bool ForDeviceAddr = false)6609: Components(Components), MapType(MapType), MapModifiers(MapModifiers),6610MotionModifiers(MotionModifiers),6611ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),6612Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}6613};66146615/// If use_device_ptr or use_device_addr is used on a decl which is a struct6616/// member and there is no map information about it, then emission of that6617/// entry is deferred until the whole struct has been processed.6618struct DeferredDevicePtrEntryTy {6619const Expr *IE = nullptr;6620const ValueDecl *VD = nullptr;6621bool ForDeviceAddr = false;66226623DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,6624bool ForDeviceAddr)6625: IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}6626};66276628/// The target directive from where the mappable clauses were extracted. It6629/// is either a executable directive or a user-defined mapper directive.6630llvm::PointerUnion<const OMPExecutableDirective *,6631const OMPDeclareMapperDecl *>6632CurDir;66336634/// Function the directive is being generated for.6635CodeGenFunction &CGF;66366637/// Set of all first private variables in the current directive.6638/// bool data is set to true if the variable is implicitly marked as6639/// firstprivate, false otherwise.6640llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;66416642/// Map between device pointer declarations and their expression components.6643/// The key value for declarations in 'this' is null.6644llvm::DenseMap<6645const ValueDecl *,6646SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>6647DevPointersMap;66486649/// Map between device addr declarations and their expression components.6650/// The key value for declarations in 'this' is null.6651llvm::DenseMap<6652const ValueDecl *,6653SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>6654HasDevAddrsMap;66556656/// Map between lambda declarations and their map type.6657llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;66586659llvm::Value *getExprTypeSize(const Expr *E) const {6660QualType ExprTy = E->getType().getCanonicalType();66616662// Calculate the size for array shaping expression.6663if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {6664llvm::Value *Size =6665CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());6666for (const Expr *SE : OAE->getDimensions()) {6667llvm::Value *Sz = CGF.EmitScalarExpr(SE);6668Sz = CGF.EmitScalarConversion(Sz, SE->getType(),6669CGF.getContext().getSizeType(),6670SE->getExprLoc());6671Size = CGF.Builder.CreateNUWMul(Size, Sz);6672}6673return Size;6674}66756676// Reference types are ignored for mapping purposes.6677if (const auto *RefTy = ExprTy->getAs<ReferenceType>())6678ExprTy = RefTy->getPointeeType().getCanonicalType();66796680// Given that an array section is considered a built-in type, we need to6681// do the calculation based on the length of the section instead of relying6682// on CGF.getTypeSize(E->getType()).6683if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {6684QualType BaseTy = ArraySectionExpr::getBaseOriginalType(6685OAE->getBase()->IgnoreParenImpCasts())6686.getCanonicalType();66876688// If there is no length associated with the expression and lower bound is6689// not specified too, that means we are using the whole length of the6690// base.6691if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&6692!OAE->getLowerBound())6693return CGF.getTypeSize(BaseTy);66946695llvm::Value *ElemSize;6696if (const auto *PTy = BaseTy->getAs<PointerType>()) {6697ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());6698} else {6699const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());6700assert(ATy && "Expecting array type if not a pointer type.");6701ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());6702}67036704// If we don't have a length at this point, that is because we have an6705// array section with a single element.6706if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())6707return ElemSize;67086709if (const Expr *LenExpr = OAE->getLength()) {6710llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);6711LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),6712CGF.getContext().getSizeType(),6713LenExpr->getExprLoc());6714return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);6715}6716assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&6717OAE->getLowerBound() && "expected array_section[lb:].");6718// Size = sizetype - lb * elemtype;6719llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);6720llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());6721LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),6722CGF.getContext().getSizeType(),6723OAE->getLowerBound()->getExprLoc());6724LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);6725llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);6726llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);6727LengthVal = CGF.Builder.CreateSelect(6728Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));6729return LengthVal;6730}6731return CGF.getTypeSize(ExprTy);6732}67336734/// Return the corresponding bits for a given map clause modifier. Add6735/// a flag marking the map as a pointer if requested. Add a flag marking the6736/// map as the first one of a series of maps that relate to the same map6737/// expression.6738OpenMPOffloadMappingFlags getMapTypeBits(6739OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,6740ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,6741bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {6742OpenMPOffloadMappingFlags Bits =6743IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT6744: OpenMPOffloadMappingFlags::OMP_MAP_NONE;6745switch (MapType) {6746case OMPC_MAP_alloc:6747case OMPC_MAP_release:6748// alloc and release is the default behavior in the runtime library, i.e.6749// if we don't pass any bits alloc/release that is what the runtime is6750// going to do. Therefore, we don't need to signal anything for these two6751// type modifiers.6752break;6753case OMPC_MAP_to:6754Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;6755break;6756case OMPC_MAP_from:6757Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;6758break;6759case OMPC_MAP_tofrom:6760Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |6761OpenMPOffloadMappingFlags::OMP_MAP_FROM;6762break;6763case OMPC_MAP_delete:6764Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;6765break;6766case OMPC_MAP_unknown:6767llvm_unreachable("Unexpected map type!");6768}6769if (AddPtrFlag)6770Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;6771if (AddIsTargetParamFlag)6772Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;6773if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))6774Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;6775if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))6776Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;6777if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||6778llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))6779Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;6780if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))6781Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;6782if (IsNonContiguous)6783Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;6784return Bits;6785}67866787/// Return true if the provided expression is a final array section. A6788/// final array section, is one whose length can't be proved to be one.6789bool isFinalArraySectionExpression(const Expr *E) const {6790const auto *OASE = dyn_cast<ArraySectionExpr>(E);67916792// It is not an array section and therefore not a unity-size one.6793if (!OASE)6794return false;67956796// An array section with no colon always refer to a single element.6797if (OASE->getColonLocFirst().isInvalid())6798return false;67996800const Expr *Length = OASE->getLength();68016802// If we don't have a length we have to check if the array has size 16803// for this dimension. Also, we should always expect a length if the6804// base type is pointer.6805if (!Length) {6806QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(6807OASE->getBase()->IgnoreParenImpCasts())6808.getCanonicalType();6809if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))6810return ATy->getSExtSize() != 1;6811// If we don't have a constant dimension length, we have to consider6812// the current section as having any size, so it is not necessarily6813// unitary. If it happen to be unity size, that's user fault.6814return true;6815}68166817// Check if the length evaluates to 1.6818Expr::EvalResult Result;6819if (!Length->EvaluateAsInt(Result, CGF.getContext()))6820return true; // Can have more that size 1.68216822llvm::APSInt ConstLength = Result.Val.getInt();6823return ConstLength.getSExtValue() != 1;6824}68256826/// Generate the base pointers, section pointers, sizes, map type bits, and6827/// user-defined mappers (all included in \a CombinedInfo) for the provided6828/// map type, map or motion modifiers, and expression components.6829/// \a IsFirstComponent should be set to true if the provided set of6830/// components is the first associated with a capture.6831void generateInfoForComponentList(6832OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,6833ArrayRef<OpenMPMotionModifierKind> MotionModifiers,6834OMPClauseMappableExprCommon::MappableExprComponentListRef Components,6835MapCombinedInfoTy &CombinedInfo,6836MapCombinedInfoTy &StructBaseCombinedInfo,6837StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,6838bool IsImplicit, bool GenerateAllInfoForClauses,6839const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,6840const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,6841ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>6842OverlappedElements = std::nullopt,6843bool AreBothBasePtrAndPteeMapped = false) const {6844// The following summarizes what has to be generated for each map and the6845// types below. The generated information is expressed in this order:6846// base pointer, section pointer, size, flags6847// (to add to the ones that come from the map type and modifier).6848//6849// double d;6850// int i[100];6851// float *p;6852// int **a = &i;6853//6854// struct S1 {6855// int i;6856// float f[50];6857// }6858// struct S2 {6859// int i;6860// float f[50];6861// S1 s;6862// double *p;6863// struct S2 *ps;6864// int &ref;6865// }6866// S2 s;6867// S2 *ps;6868//6869// map(d)6870// &d, &d, sizeof(double), TARGET_PARAM | TO | FROM6871//6872// map(i)6873// &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM6874//6875// map(i[1:23])6876// &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM6877//6878// map(p)6879// &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM6880//6881// map(p[1:24])6882// &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ6883// in unified shared memory mode or for local pointers6884// p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM6885//6886// map((*a)[0:3])6887// &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM6888// &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM6889//6890// map(**a)6891// &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM6892// &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM6893//6894// map(s)6895// &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM6896//6897// map(s.i)6898// &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM6899//6900// map(s.s.f)6901// &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM6902//6903// map(s.p)6904// &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM6905//6906// map(to: s.p[:22])6907// &s, &(s.p), sizeof(double*), TARGET_PARAM (*)6908// &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)6909// &(s.p), &(s.p[0]), 22*sizeof(double),6910// MEMBER_OF(1) | PTR_AND_OBJ | TO (***)6911// (*) alloc space for struct members, only this is a target parameter6912// (**) map the pointer (nothing to be mapped in this example) (the compiler6913// optimizes this entry out, same in the examples below)6914// (***) map the pointee (map: to)6915//6916// map(to: s.ref)6917// &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)6918// &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)6919// (*) alloc space for struct members, only this is a target parameter6920// (**) map the pointer (nothing to be mapped in this example) (the compiler6921// optimizes this entry out, same in the examples below)6922// (***) map the pointee (map: to)6923//6924// map(s.ps)6925// &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM6926//6927// map(from: s.ps->s.i)6928// &s, &(s.ps), sizeof(S2*), TARGET_PARAM6929// &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)6930// &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM6931//6932// map(to: s.ps->ps)6933// &s, &(s.ps), sizeof(S2*), TARGET_PARAM6934// &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)6935// &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO6936//6937// map(s.ps->ps->ps)6938// &s, &(s.ps), sizeof(S2*), TARGET_PARAM6939// &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)6940// &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ6941// &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM6942//6943// map(to: s.ps->ps->s.f[:22])6944// &s, &(s.ps), sizeof(S2*), TARGET_PARAM6945// &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)6946// &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ6947// &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO6948//6949// map(ps)6950// &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM6951//6952// map(ps->i)6953// ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM6954//6955// map(ps->s.f)6956// ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM6957//6958// map(from: ps->p)6959// ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM6960//6961// map(to: ps->p[:22])6962// ps, &(ps->p), sizeof(double*), TARGET_PARAM6963// ps, &(ps->p), sizeof(double*), MEMBER_OF(1)6964// &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO6965//6966// map(ps->ps)6967// ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM6968//6969// map(from: ps->ps->s.i)6970// ps, &(ps->ps), sizeof(S2*), TARGET_PARAM6971// ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)6972// &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM6973//6974// map(from: ps->ps->ps)6975// ps, &(ps->ps), sizeof(S2*), TARGET_PARAM6976// ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)6977// &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM6978//6979// map(ps->ps->ps->ps)6980// ps, &(ps->ps), sizeof(S2*), TARGET_PARAM6981// ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)6982// &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ6983// &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM6984//6985// map(to: ps->ps->ps->s.f[:22])6986// ps, &(ps->ps), sizeof(S2*), TARGET_PARAM6987// ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)6988// &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ6989// &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO6990//6991// map(to: s.f[:22]) map(from: s.p[:33])6992// &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +6993// sizeof(double*) (**), TARGET_PARAM6994// &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO6995// &s, &(s.p), sizeof(double*), MEMBER_OF(1)6996// &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM6997// (*) allocate contiguous space needed to fit all mapped members even if6998// we allocate space for members not mapped (in this example,6999// s.f[22..49] and s.s are not mapped, yet we must allocate space for7000// them as well because they fall between &s.f[0] and &s.p)7001//7002// map(from: s.f[:22]) map(to: ps->p[:33])7003// &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM7004// ps, &(ps->p), sizeof(S2*), TARGET_PARAM7005// ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)7006// &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO7007// (*) the struct this entry pertains to is the 2nd element in the list of7008// arguments, hence MEMBER_OF(2)7009//7010// map(from: s.f[:22], s.s) map(to: ps->p[:33])7011// &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM7012// &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM7013// &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM7014// ps, &(ps->p), sizeof(S2*), TARGET_PARAM7015// ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)7016// &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO7017// (*) the struct this entry pertains to is the 4th element in the list7018// of arguments, hence MEMBER_OF(4)7019//7020// map(p, p[:100])7021// ===> map(p[:100])7022// &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM70237024// Track if the map information being generated is the first for a capture.7025bool IsCaptureFirstInfo = IsFirstComponentList;7026// When the variable is on a declare target link or in a to clause with7027// unified memory, a reference is needed to hold the host/device address7028// of the variable.7029bool RequiresReference = false;70307031// Scan the components from the base to the complete expression.7032auto CI = Components.rbegin();7033auto CE = Components.rend();7034auto I = CI;70357036// Track if the map information being generated is the first for a list of7037// components.7038bool IsExpressionFirstInfo = true;7039bool FirstPointerInComplexData = false;7040Address BP = Address::invalid();7041const Expr *AssocExpr = I->getAssociatedExpression();7042const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);7043const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);7044const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);70457046if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)7047return;7048if (isa<MemberExpr>(AssocExpr)) {7049// The base is the 'this' pointer. The content of the pointer is going7050// to be the base of the field being mapped.7051BP = CGF.LoadCXXThisAddress();7052} else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||7053(OASE &&7054isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {7055BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();7056} else if (OAShE &&7057isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {7058BP = Address(7059CGF.EmitScalarExpr(OAShE->getBase()),7060CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),7061CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));7062} else {7063// The base is the reference to the variable.7064// BP = &Var.7065BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();7066if (const auto *VD =7067dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {7068if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =7069OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {7070if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||7071((*Res == OMPDeclareTargetDeclAttr::MT_To ||7072*Res == OMPDeclareTargetDeclAttr::MT_Enter) &&7073CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {7074RequiresReference = true;7075BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);7076}7077}7078}70797080// If the variable is a pointer and is being dereferenced (i.e. is not7081// the last component), the base has to be the pointer itself, not its7082// reference. References are ignored for mapping purposes.7083QualType Ty =7084I->getAssociatedDeclaration()->getType().getNonReferenceType();7085if (Ty->isAnyPointerType() && std::next(I) != CE) {7086// No need to generate individual map information for the pointer, it7087// can be associated with the combined storage if shared memory mode is7088// active or the base declaration is not global variable.7089const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());7090if (!AreBothBasePtrAndPteeMapped &&7091(CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||7092!VD || VD->hasLocalStorage()))7093BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());7094else7095FirstPointerInComplexData = true;7096++I;7097}7098}70997100// Track whether a component of the list should be marked as MEMBER_OF some7101// combined entry (for partial structs). Only the first PTR_AND_OBJ entry7102// in a component list should be marked as MEMBER_OF, all subsequent entries7103// do not belong to the base struct. E.g.7104// struct S2 s;7105// s.ps->ps->ps->f[:]7106// (1) (2) (3) (4)7107// ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a7108// PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)7109// is the pointee of ps(2) which is not member of struct s, so it should not7110// be marked as such (it is still PTR_AND_OBJ).7111// The variable is initialized to false so that PTR_AND_OBJ entries which7112// are not struct members are not considered (e.g. array of pointers to7113// data).7114bool ShouldBeMemberOf = false;71157116// Variable keeping track of whether or not we have encountered a component7117// in the component list which is a member expression. Useful when we have a7118// pointer or a final array section, in which case it is the previous7119// component in the list which tells us whether we have a member expression.7120// E.g. X.f[:]7121// While processing the final array section "[:]" it is "f" which tells us7122// whether we are dealing with a member of a declared struct.7123const MemberExpr *EncounteredME = nullptr;71247125// Track for the total number of dimension. Start from one for the dummy7126// dimension.7127uint64_t DimSize = 1;71287129bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;7130bool IsPrevMemberReference = false;71317132// We need to check if we will be encountering any MEs. If we do not7133// encounter any ME expression it means we will be mapping the whole struct.7134// In that case we need to skip adding an entry for the struct to the7135// CombinedInfo list and instead add an entry to the StructBaseCombinedInfo7136// list only when generating all info for clauses.7137bool IsMappingWholeStruct = true;7138if (!GenerateAllInfoForClauses) {7139IsMappingWholeStruct = false;7140} else {7141for (auto TempI = I; TempI != CE; ++TempI) {7142const MemberExpr *PossibleME =7143dyn_cast<MemberExpr>(TempI->getAssociatedExpression());7144if (PossibleME) {7145IsMappingWholeStruct = false;7146break;7147}7148}7149}71507151for (; I != CE; ++I) {7152// If the current component is member of a struct (parent struct) mark it.7153if (!EncounteredME) {7154EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());7155// If we encounter a PTR_AND_OBJ entry from now on it should be marked7156// as MEMBER_OF the parent struct.7157if (EncounteredME) {7158ShouldBeMemberOf = true;7159// Do not emit as complex pointer if this is actually not array-like7160// expression.7161if (FirstPointerInComplexData) {7162QualType Ty = std::prev(I)7163->getAssociatedDeclaration()7164->getType()7165.getNonReferenceType();7166BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());7167FirstPointerInComplexData = false;7168}7169}7170}71717172auto Next = std::next(I);71737174// We need to generate the addresses and sizes if this is the last7175// component, if the component is a pointer or if it is an array section7176// whose length can't be proved to be one. If this is a pointer, it7177// becomes the base address for the following components.71787179// A final array section, is one whose length can't be proved to be one.7180// If the map item is non-contiguous then we don't treat any array section7181// as final array section.7182bool IsFinalArraySection =7183!IsNonContiguous &&7184isFinalArraySectionExpression(I->getAssociatedExpression());71857186// If we have a declaration for the mapping use that, otherwise use7187// the base declaration of the map clause.7188const ValueDecl *MapDecl = (I->getAssociatedDeclaration())7189? I->getAssociatedDeclaration()7190: BaseDecl;7191MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()7192: MapExpr;71937194// Get information on whether the element is a pointer. Have to do a7195// special treatment for array sections given that they are built-in7196// types.7197const auto *OASE =7198dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());7199const auto *OAShE =7200dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());7201const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());7202const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());7203bool IsPointer =7204OAShE ||7205(OASE && ArraySectionExpr::getBaseOriginalType(OASE)7206.getCanonicalType()7207->isAnyPointerType()) ||7208I->getAssociatedExpression()->getType()->isAnyPointerType();7209bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&7210MapDecl &&7211MapDecl->getType()->isLValueReferenceType();7212bool IsNonDerefPointer = IsPointer &&7213!(UO && UO->getOpcode() != UO_Deref) && !BO &&7214!IsNonContiguous;72157216if (OASE)7217++DimSize;72187219if (Next == CE || IsMemberReference || IsNonDerefPointer ||7220IsFinalArraySection) {7221// If this is not the last component, we expect the pointer to be7222// associated with an array expression or member expression.7223assert((Next == CE ||7224isa<MemberExpr>(Next->getAssociatedExpression()) ||7225isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||7226isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||7227isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||7228isa<UnaryOperator>(Next->getAssociatedExpression()) ||7229isa<BinaryOperator>(Next->getAssociatedExpression())) &&7230"Unexpected expression");72317232Address LB = Address::invalid();7233Address LowestElem = Address::invalid();7234auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,7235const MemberExpr *E) {7236const Expr *BaseExpr = E->getBase();7237// If this is s.x, emit s as an lvalue. If it is s->x, emit s as a7238// scalar.7239LValue BaseLV;7240if (E->isArrow()) {7241LValueBaseInfo BaseInfo;7242TBAAAccessInfo TBAAInfo;7243Address Addr =7244CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);7245QualType PtrTy = BaseExpr->getType()->getPointeeType();7246BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);7247} else {7248BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);7249}7250return BaseLV;7251};7252if (OAShE) {7253LowestElem = LB =7254Address(CGF.EmitScalarExpr(OAShE->getBase()),7255CGF.ConvertTypeForMem(7256OAShE->getBase()->getType()->getPointeeType()),7257CGF.getContext().getTypeAlignInChars(7258OAShE->getBase()->getType()));7259} else if (IsMemberReference) {7260const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());7261LValue BaseLVal = EmitMemberExprBase(CGF, ME);7262LowestElem = CGF.EmitLValueForFieldInitialization(7263BaseLVal, cast<FieldDecl>(MapDecl))7264.getAddress();7265LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())7266.getAddress();7267} else {7268LowestElem = LB =7269CGF.EmitOMPSharedLValue(I->getAssociatedExpression())7270.getAddress();7271}72727273// If this component is a pointer inside the base struct then we don't7274// need to create any entry for it - it will be combined with the object7275// it is pointing to into a single PTR_AND_OBJ entry.7276bool IsMemberPointerOrAddr =7277EncounteredME &&7278(((IsPointer || ForDeviceAddr) &&7279I->getAssociatedExpression() == EncounteredME) ||7280(IsPrevMemberReference && !IsPointer) ||7281(IsMemberReference && Next != CE &&7282!Next->getAssociatedExpression()->getType()->isPointerType()));7283if (!OverlappedElements.empty() && Next == CE) {7284// Handle base element with the info for overlapped elements.7285assert(!PartialStruct.Base.isValid() && "The base element is set.");7286assert(!IsPointer &&7287"Unexpected base element with the pointer type.");7288// Mark the whole struct as the struct that requires allocation on the7289// device.7290PartialStruct.LowestElem = {0, LowestElem};7291CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(7292I->getAssociatedExpression()->getType());7293Address HB = CGF.Builder.CreateConstGEP(7294CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(7295LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),7296TypeSize.getQuantity() - 1);7297PartialStruct.HighestElem = {7298std::numeric_limits<decltype(7299PartialStruct.HighestElem.first)>::max(),7300HB};7301PartialStruct.Base = BP;7302PartialStruct.LB = LB;7303assert(7304PartialStruct.PreliminaryMapData.BasePointers.empty() &&7305"Overlapped elements must be used only once for the variable.");7306std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);7307// Emit data for non-overlapped data.7308OpenMPOffloadMappingFlags Flags =7309OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |7310getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,7311/*AddPtrFlag=*/false,7312/*AddIsTargetParamFlag=*/false, IsNonContiguous);7313llvm::Value *Size = nullptr;7314// Do bitcopy of all non-overlapped structure elements.7315for (OMPClauseMappableExprCommon::MappableExprComponentListRef7316Component : OverlappedElements) {7317Address ComponentLB = Address::invalid();7318for (const OMPClauseMappableExprCommon::MappableComponent &MC :7319Component) {7320if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {7321const auto *FD = dyn_cast<FieldDecl>(VD);7322if (FD && FD->getType()->isLValueReferenceType()) {7323const auto *ME =7324cast<MemberExpr>(MC.getAssociatedExpression());7325LValue BaseLVal = EmitMemberExprBase(CGF, ME);7326ComponentLB =7327CGF.EmitLValueForFieldInitialization(BaseLVal, FD)7328.getAddress();7329} else {7330ComponentLB =7331CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())7332.getAddress();7333}7334llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);7335llvm::Value *LBPtr = LB.emitRawPointer(CGF);7336Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,7337LBPtr);7338break;7339}7340}7341assert(Size && "Failed to determine structure size");7342CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);7343CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));7344CombinedInfo.DevicePtrDecls.push_back(nullptr);7345CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);7346CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));7347CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(7348Size, CGF.Int64Ty, /*isSigned=*/true));7349CombinedInfo.Types.push_back(Flags);7350CombinedInfo.Mappers.push_back(nullptr);7351CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize7352: 1);7353LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);7354}7355CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);7356CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));7357CombinedInfo.DevicePtrDecls.push_back(nullptr);7358CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);7359CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));7360llvm::Value *LBPtr = LB.emitRawPointer(CGF);7361Size = CGF.Builder.CreatePtrDiff(7362CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),7363LBPtr);7364CombinedInfo.Sizes.push_back(7365CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));7366CombinedInfo.Types.push_back(Flags);7367CombinedInfo.Mappers.push_back(nullptr);7368CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize7369: 1);7370break;7371}7372llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());7373// Skip adding an entry in the CurInfo of this combined entry if the7374// whole struct is currently being mapped. The struct needs to be added7375// in the first position before any data internal to the struct is being7376// mapped.7377if (!IsMemberPointerOrAddr ||7378(Next == CE && MapType != OMPC_MAP_unknown)) {7379if (!IsMappingWholeStruct) {7380CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);7381CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));7382CombinedInfo.DevicePtrDecls.push_back(nullptr);7383CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);7384CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));7385CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(7386Size, CGF.Int64Ty, /*isSigned=*/true));7387CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize7388: 1);7389} else {7390StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);7391StructBaseCombinedInfo.BasePointers.push_back(7392BP.emitRawPointer(CGF));7393StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);7394StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);7395StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));7396StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(7397Size, CGF.Int64Ty, /*isSigned=*/true));7398StructBaseCombinedInfo.NonContigInfo.Dims.push_back(7399IsNonContiguous ? DimSize : 1);7400}74017402// If Mapper is valid, the last component inherits the mapper.7403bool HasMapper = Mapper && Next == CE;7404if (!IsMappingWholeStruct)7405CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);7406else7407StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper7408: nullptr);74097410// We need to add a pointer flag for each map that comes from the7411// same expression except for the first one. We also need to signal7412// this map is the first one that relates with the current capture7413// (there is a set of entries for each capture).7414OpenMPOffloadMappingFlags Flags =7415getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,7416!IsExpressionFirstInfo || RequiresReference ||7417FirstPointerInComplexData || IsMemberReference,7418AreBothBasePtrAndPteeMapped ||7419(IsCaptureFirstInfo && !RequiresReference),7420IsNonContiguous);74217422if (!IsExpressionFirstInfo || IsMemberReference) {7423// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,7424// then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.7425if (IsPointer || (IsMemberReference && Next != CE))7426Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |7427OpenMPOffloadMappingFlags::OMP_MAP_FROM |7428OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |7429OpenMPOffloadMappingFlags::OMP_MAP_DELETE |7430OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);74317432if (ShouldBeMemberOf) {7433// Set placeholder value MEMBER_OF=FFFF to indicate that the flag7434// should be later updated with the correct value of MEMBER_OF.7435Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;7436// From now on, all subsequent PTR_AND_OBJ entries should not be7437// marked as MEMBER_OF.7438ShouldBeMemberOf = false;7439}7440}74417442if (!IsMappingWholeStruct)7443CombinedInfo.Types.push_back(Flags);7444else7445StructBaseCombinedInfo.Types.push_back(Flags);7446}74477448// If we have encountered a member expression so far, keep track of the7449// mapped member. If the parent is "*this", then the value declaration7450// is nullptr.7451if (EncounteredME) {7452const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());7453unsigned FieldIndex = FD->getFieldIndex();74547455// Update info about the lowest and highest elements for this struct7456if (!PartialStruct.Base.isValid()) {7457PartialStruct.LowestElem = {FieldIndex, LowestElem};7458if (IsFinalArraySection) {7459Address HB =7460CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)7461.getAddress();7462PartialStruct.HighestElem = {FieldIndex, HB};7463} else {7464PartialStruct.HighestElem = {FieldIndex, LowestElem};7465}7466PartialStruct.Base = BP;7467PartialStruct.LB = BP;7468} else if (FieldIndex < PartialStruct.LowestElem.first) {7469PartialStruct.LowestElem = {FieldIndex, LowestElem};7470} else if (FieldIndex > PartialStruct.HighestElem.first) {7471if (IsFinalArraySection) {7472Address HB =7473CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)7474.getAddress();7475PartialStruct.HighestElem = {FieldIndex, HB};7476} else {7477PartialStruct.HighestElem = {FieldIndex, LowestElem};7478}7479}7480}74817482// Need to emit combined struct for array sections.7483if (IsFinalArraySection || IsNonContiguous)7484PartialStruct.IsArraySection = true;74857486// If we have a final array section, we are done with this expression.7487if (IsFinalArraySection)7488break;74897490// The pointer becomes the base for the next element.7491if (Next != CE)7492BP = IsMemberReference ? LowestElem : LB;74937494IsExpressionFirstInfo = false;7495IsCaptureFirstInfo = false;7496FirstPointerInComplexData = false;7497IsPrevMemberReference = IsMemberReference;7498} else if (FirstPointerInComplexData) {7499QualType Ty = Components.rbegin()7500->getAssociatedDeclaration()7501->getType()7502.getNonReferenceType();7503BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());7504FirstPointerInComplexData = false;7505}7506}7507// If ran into the whole component - allocate the space for the whole7508// record.7509if (!EncounteredME)7510PartialStruct.HasCompleteRecord = true;75117512if (!IsNonContiguous)7513return;75147515const ASTContext &Context = CGF.getContext();75167517// For supporting stride in array section, we need to initialize the first7518// dimension size as 1, first offset as 0, and first count as 17519MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};7520MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};7521MapValuesArrayTy CurStrides;7522MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};7523uint64_t ElementTypeSize;75247525// Collect Size information for each dimension and get the element size as7526// the first Stride. For example, for `int arr[10][10]`, the DimSizes7527// should be [10, 10] and the first stride is 4 btyes.7528for (const OMPClauseMappableExprCommon::MappableComponent &Component :7529Components) {7530const Expr *AssocExpr = Component.getAssociatedExpression();7531const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);75327533if (!OASE)7534continue;75357536QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());7537auto *CAT = Context.getAsConstantArrayType(Ty);7538auto *VAT = Context.getAsVariableArrayType(Ty);75397540// We need all the dimension size except for the last dimension.7541assert((VAT || CAT || &Component == &*Components.begin()) &&7542"Should be either ConstantArray or VariableArray if not the "7543"first Component");75447545// Get element size if CurStrides is empty.7546if (CurStrides.empty()) {7547const Type *ElementType = nullptr;7548if (CAT)7549ElementType = CAT->getElementType().getTypePtr();7550else if (VAT)7551ElementType = VAT->getElementType().getTypePtr();7552else7553assert(&Component == &*Components.begin() &&7554"Only expect pointer (non CAT or VAT) when this is the "7555"first Component");7556// If ElementType is null, then it means the base is a pointer7557// (neither CAT nor VAT) and we'll attempt to get ElementType again7558// for next iteration.7559if (ElementType) {7560// For the case that having pointer as base, we need to remove one7561// level of indirection.7562if (&Component != &*Components.begin())7563ElementType = ElementType->getPointeeOrArrayElementType();7564ElementTypeSize =7565Context.getTypeSizeInChars(ElementType).getQuantity();7566CurStrides.push_back(7567llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));7568}7569}7570// Get dimension value except for the last dimension since we don't need7571// it.7572if (DimSizes.size() < Components.size() - 1) {7573if (CAT)7574DimSizes.push_back(7575llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));7576else if (VAT)7577DimSizes.push_back(CGF.Builder.CreateIntCast(7578CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,7579/*IsSigned=*/false));7580}7581}75827583// Skip the dummy dimension since we have already have its information.7584auto *DI = DimSizes.begin() + 1;7585// Product of dimension.7586llvm::Value *DimProd =7587llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);75887589// Collect info for non-contiguous. Notice that offset, count, and stride7590// are only meaningful for array-section, so we insert a null for anything7591// other than array-section.7592// Also, the size of offset, count, and stride are not the same as7593// pointers, base_pointers, sizes, or dims. Instead, the size of offset,7594// count, and stride are the same as the number of non-contiguous7595// declaration in target update to/from clause.7596for (const OMPClauseMappableExprCommon::MappableComponent &Component :7597Components) {7598const Expr *AssocExpr = Component.getAssociatedExpression();75997600if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {7601llvm::Value *Offset = CGF.Builder.CreateIntCast(7602CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,7603/*isSigned=*/false);7604CurOffsets.push_back(Offset);7605CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));7606CurStrides.push_back(CurStrides.back());7607continue;7608}76097610const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);76117612if (!OASE)7613continue;76147615// Offset7616const Expr *OffsetExpr = OASE->getLowerBound();7617llvm::Value *Offset = nullptr;7618if (!OffsetExpr) {7619// If offset is absent, then we just set it to zero.7620Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);7621} else {7622Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),7623CGF.Int64Ty,7624/*isSigned=*/false);7625}7626CurOffsets.push_back(Offset);76277628// Count7629const Expr *CountExpr = OASE->getLength();7630llvm::Value *Count = nullptr;7631if (!CountExpr) {7632// In Clang, once a high dimension is an array section, we construct all7633// the lower dimension as array section, however, for case like7634// arr[0:2][2], Clang construct the inner dimension as an array section7635// but it actually is not in an array section form according to spec.7636if (!OASE->getColonLocFirst().isValid() &&7637!OASE->getColonLocSecond().isValid()) {7638Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);7639} else {7640// OpenMP 5.0, 2.1.5 Array Sections, Description.7641// When the length is absent it defaults to ⌈(size −7642// lower-bound)/stride⌉, where size is the size of the array7643// dimension.7644const Expr *StrideExpr = OASE->getStride();7645llvm::Value *Stride =7646StrideExpr7647? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),7648CGF.Int64Ty, /*isSigned=*/false)7649: nullptr;7650if (Stride)7651Count = CGF.Builder.CreateUDiv(7652CGF.Builder.CreateNUWSub(*DI, Offset), Stride);7653else7654Count = CGF.Builder.CreateNUWSub(*DI, Offset);7655}7656} else {7657Count = CGF.EmitScalarExpr(CountExpr);7658}7659Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);7660CurCounts.push_back(Count);76617662// Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size7663// Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:7664// Offset Count Stride7665// D0 0 1 4 (int) <- dummy dimension7666// D1 0 2 8 (2 * (1) * 4)7667// D2 1 2 20 (1 * (1 * 5) * 4)7668// D3 0 2 200 (2 * (1 * 5 * 4) * 4)7669const Expr *StrideExpr = OASE->getStride();7670llvm::Value *Stride =7671StrideExpr7672? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),7673CGF.Int64Ty, /*isSigned=*/false)7674: nullptr;7675DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));7676if (Stride)7677CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));7678else7679CurStrides.push_back(DimProd);7680if (DI != DimSizes.end())7681++DI;7682}76837684CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);7685CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);7686CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);7687}76887689/// Return the adjusted map modifiers if the declaration a capture refers to7690/// appears in a first-private clause. This is expected to be used only with7691/// directives that start with 'target'.7692OpenMPOffloadMappingFlags7693getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {7694assert(Cap.capturesVariable() && "Expected capture by reference only!");76957696// A first private variable captured by reference will use only the7697// 'private ptr' and 'map to' flag. Return the right flags if the captured7698// declaration is known as first-private in this handler.7699if (FirstPrivateDecls.count(Cap.getCapturedVar())) {7700if (Cap.getCapturedVar()->getType()->isAnyPointerType())7701return OpenMPOffloadMappingFlags::OMP_MAP_TO |7702OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;7703return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |7704OpenMPOffloadMappingFlags::OMP_MAP_TO;7705}7706auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());7707if (I != LambdasMap.end())7708// for map(to: lambda): using user specified map type.7709return getMapTypeBits(7710I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),7711/*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),7712/*AddPtrFlag=*/false,7713/*AddIsTargetParamFlag=*/false,7714/*isNonContiguous=*/false);7715return OpenMPOffloadMappingFlags::OMP_MAP_TO |7716OpenMPOffloadMappingFlags::OMP_MAP_FROM;7717}77187719void getPlainLayout(const CXXRecordDecl *RD,7720llvm::SmallVectorImpl<const FieldDecl *> &Layout,7721bool AsBase) const {7722const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);77237724llvm::StructType *St =7725AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();77267727unsigned NumElements = St->getNumElements();7728llvm::SmallVector<7729llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>7730RecordLayout(NumElements);77317732// Fill bases.7733for (const auto &I : RD->bases()) {7734if (I.isVirtual())7735continue;77367737QualType BaseTy = I.getType();7738const auto *Base = BaseTy->getAsCXXRecordDecl();7739// Ignore empty bases.7740if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||7741CGF.getContext()7742.getASTRecordLayout(Base)7743.getNonVirtualSize()7744.isZero())7745continue;77467747unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);7748RecordLayout[FieldIndex] = Base;7749}7750// Fill in virtual bases.7751for (const auto &I : RD->vbases()) {7752QualType BaseTy = I.getType();7753// Ignore empty bases.7754if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))7755continue;77567757const auto *Base = BaseTy->getAsCXXRecordDecl();7758unsigned FieldIndex = RL.getVirtualBaseIndex(Base);7759if (RecordLayout[FieldIndex])7760continue;7761RecordLayout[FieldIndex] = Base;7762}7763// Fill in all the fields.7764assert(!RD->isUnion() && "Unexpected union.");7765for (const auto *Field : RD->fields()) {7766// Fill in non-bitfields. (Bitfields always use a zero pattern, which we7767// will fill in later.)7768if (!Field->isBitField() &&7769!isEmptyFieldForLayout(CGF.getContext(), Field)) {7770unsigned FieldIndex = RL.getLLVMFieldNo(Field);7771RecordLayout[FieldIndex] = Field;7772}7773}7774for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>7775&Data : RecordLayout) {7776if (Data.isNull())7777continue;7778if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())7779getPlainLayout(Base, Layout, /*AsBase=*/true);7780else7781Layout.push_back(Data.get<const FieldDecl *>());7782}7783}77847785/// Generate all the base pointers, section pointers, sizes, map types, and7786/// mappers for the extracted mappable expressions (all included in \a7787/// CombinedInfo). Also, for each item that relates with a device pointer, a7788/// pair of the relevant declaration and index where it occurs is appended to7789/// the device pointers info array.7790void generateAllInfoForClauses(7791ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,7792llvm::OpenMPIRBuilder &OMPBuilder,7793const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =7794llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {7795// We have to process the component lists that relate with the same7796// declaration in a single chunk so that we can generate the map flags7797// correctly. Therefore, we organize all lists in a map.7798enum MapKind { Present, Allocs, Other, Total };7799llvm::MapVector<CanonicalDeclPtr<const Decl>,7800SmallVector<SmallVector<MapInfo, 8>, 4>>7801Info;78027803// Helper function to fill the information map for the different supported7804// clauses.7805auto &&InfoGen =7806[&Info, &SkipVarSet](7807const ValueDecl *D, MapKind Kind,7808OMPClauseMappableExprCommon::MappableExprComponentListRef L,7809OpenMPMapClauseKind MapType,7810ArrayRef<OpenMPMapModifierKind> MapModifiers,7811ArrayRef<OpenMPMotionModifierKind> MotionModifiers,7812bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,7813const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {7814if (SkipVarSet.contains(D))7815return;7816auto It = Info.find(D);7817if (It == Info.end())7818It = Info7819.insert(std::make_pair(7820D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))7821.first;7822It->second[Kind].emplace_back(7823L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,7824IsImplicit, Mapper, VarRef, ForDeviceAddr);7825};78267827for (const auto *Cl : Clauses) {7828const auto *C = dyn_cast<OMPMapClause>(Cl);7829if (!C)7830continue;7831MapKind Kind = Other;7832if (llvm::is_contained(C->getMapTypeModifiers(),7833OMPC_MAP_MODIFIER_present))7834Kind = Present;7835else if (C->getMapType() == OMPC_MAP_alloc)7836Kind = Allocs;7837const auto *EI = C->getVarRefs().begin();7838for (const auto L : C->component_lists()) {7839const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;7840InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),7841C->getMapTypeModifiers(), std::nullopt,7842/*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),7843E);7844++EI;7845}7846}7847for (const auto *Cl : Clauses) {7848const auto *C = dyn_cast<OMPToClause>(Cl);7849if (!C)7850continue;7851MapKind Kind = Other;7852if (llvm::is_contained(C->getMotionModifiers(),7853OMPC_MOTION_MODIFIER_present))7854Kind = Present;7855const auto *EI = C->getVarRefs().begin();7856for (const auto L : C->component_lists()) {7857InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,7858C->getMotionModifiers(), /*ReturnDevicePointer=*/false,7859C->isImplicit(), std::get<2>(L), *EI);7860++EI;7861}7862}7863for (const auto *Cl : Clauses) {7864const auto *C = dyn_cast<OMPFromClause>(Cl);7865if (!C)7866continue;7867MapKind Kind = Other;7868if (llvm::is_contained(C->getMotionModifiers(),7869OMPC_MOTION_MODIFIER_present))7870Kind = Present;7871const auto *EI = C->getVarRefs().begin();7872for (const auto L : C->component_lists()) {7873InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,7874std::nullopt, C->getMotionModifiers(),7875/*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),7876*EI);7877++EI;7878}7879}78807881// Look at the use_device_ptr and use_device_addr clauses information and7882// mark the existing map entries as such. If there is no map information for7883// an entry in the use_device_ptr and use_device_addr list, we create one7884// with map type 'alloc' and zero size section. It is the user fault if that7885// was not mapped before. If there is no map information and the pointer is7886// a struct member, then we defer the emission of that entry until the whole7887// struct has been processed.7888llvm::MapVector<CanonicalDeclPtr<const Decl>,7889SmallVector<DeferredDevicePtrEntryTy, 4>>7890DeferredInfo;7891MapCombinedInfoTy UseDeviceDataCombinedInfo;78927893auto &&UseDeviceDataCombinedInfoGen =7894[&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,7895CodeGenFunction &CGF, bool IsDevAddr) {7896UseDeviceDataCombinedInfo.Exprs.push_back(VD);7897UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);7898UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);7899UseDeviceDataCombinedInfo.DevicePointers.emplace_back(7900IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);7901UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);7902UseDeviceDataCombinedInfo.Sizes.push_back(7903llvm::Constant::getNullValue(CGF.Int64Ty));7904UseDeviceDataCombinedInfo.Types.push_back(7905OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);7906UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);7907};79087909auto &&MapInfoGen =7910[&DeferredInfo, &UseDeviceDataCombinedInfoGen,7911&InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,7912OMPClauseMappableExprCommon::MappableExprComponentListRef7913Components,7914bool IsImplicit, bool IsDevAddr) {7915// We didn't find any match in our map information - generate a zero7916// size array section - if the pointer is a struct member we defer7917// this action until the whole struct has been processed.7918if (isa<MemberExpr>(IE)) {7919// Insert the pointer into Info to be processed by7920// generateInfoForComponentList. Because it is a member pointer7921// without a pointee, no entry will be generated for it, therefore7922// we need to generate one after the whole struct has been7923// processed. Nonetheless, generateInfoForComponentList must be7924// called to take the pointer into account for the calculation of7925// the range of the partial struct.7926InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,7927std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,7928nullptr, nullptr, IsDevAddr);7929DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);7930} else {7931llvm::Value *Ptr;7932if (IsDevAddr) {7933if (IE->isGLValue())7934Ptr = CGF.EmitLValue(IE).getPointer(CGF);7935else7936Ptr = CGF.EmitScalarExpr(IE);7937} else {7938Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());7939}7940UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);7941}7942};79437944auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,7945const Expr *IE, bool IsDevAddr) -> bool {7946// We potentially have map information for this declaration already.7947// Look for the first set of components that refer to it. If found,7948// return true.7949// If the first component is a member expression, we have to look into7950// 'this', which maps to null in the map of map information. Otherwise7951// look directly for the information.7952auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);7953if (It != Info.end()) {7954bool Found = false;7955for (auto &Data : It->second) {7956auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {7957return MI.Components.back().getAssociatedDeclaration() == VD;7958});7959// If we found a map entry, signal that the pointer has to be7960// returned and move on to the next declaration. Exclude cases where7961// the base pointer is mapped as array subscript, array section or7962// array shaping. The base address is passed as a pointer to base in7963// this case and cannot be used as a base for use_device_ptr list7964// item.7965if (CI != Data.end()) {7966if (IsDevAddr) {7967CI->ForDeviceAddr = IsDevAddr;7968CI->ReturnDevicePointer = true;7969Found = true;7970break;7971} else {7972auto PrevCI = std::next(CI->Components.rbegin());7973const auto *VarD = dyn_cast<VarDecl>(VD);7974if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||7975isa<MemberExpr>(IE) ||7976!VD->getType().getNonReferenceType()->isPointerType() ||7977PrevCI == CI->Components.rend() ||7978isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||7979VarD->hasLocalStorage()) {7980CI->ForDeviceAddr = IsDevAddr;7981CI->ReturnDevicePointer = true;7982Found = true;7983break;7984}7985}7986}7987}7988return Found;7989}7990return false;7991};79927993// Look at the use_device_ptr clause information and mark the existing map7994// entries as such. If there is no map information for an entry in the7995// use_device_ptr list, we create one with map type 'alloc' and zero size7996// section. It is the user fault if that was not mapped before. If there is7997// no map information and the pointer is a struct member, then we defer the7998// emission of that entry until the whole struct has been processed.7999for (const auto *Cl : Clauses) {8000const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);8001if (!C)8002continue;8003for (const auto L : C->component_lists()) {8004OMPClauseMappableExprCommon::MappableExprComponentListRef Components =8005std::get<1>(L);8006assert(!Components.empty() &&8007"Not expecting empty list of components!");8008const ValueDecl *VD = Components.back().getAssociatedDeclaration();8009VD = cast<ValueDecl>(VD->getCanonicalDecl());8010const Expr *IE = Components.back().getAssociatedExpression();8011if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))8012continue;8013MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),8014/*IsDevAddr=*/false);8015}8016}80178018llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;8019for (const auto *Cl : Clauses) {8020const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);8021if (!C)8022continue;8023for (const auto L : C->component_lists()) {8024OMPClauseMappableExprCommon::MappableExprComponentListRef Components =8025std::get<1>(L);8026assert(!std::get<1>(L).empty() &&8027"Not expecting empty list of components!");8028const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();8029if (!Processed.insert(VD).second)8030continue;8031VD = cast<ValueDecl>(VD->getCanonicalDecl());8032const Expr *IE = std::get<1>(L).back().getAssociatedExpression();8033if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))8034continue;8035MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),8036/*IsDevAddr=*/true);8037}8038}80398040for (const auto &Data : Info) {8041StructRangeInfoTy PartialStruct;8042// Current struct information:8043MapCombinedInfoTy CurInfo;8044// Current struct base information:8045MapCombinedInfoTy StructBaseCurInfo;8046const Decl *D = Data.first;8047const ValueDecl *VD = cast_or_null<ValueDecl>(D);8048bool HasMapBasePtr = false;8049bool HasMapArraySec = false;8050if (VD && VD->getType()->isAnyPointerType()) {8051for (const auto &M : Data.second) {8052HasMapBasePtr = any_of(M, [](const MapInfo &L) {8053return isa_and_present<DeclRefExpr>(L.VarRef);8054});8055HasMapArraySec = any_of(M, [](const MapInfo &L) {8056return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(8057L.VarRef);8058});8059if (HasMapBasePtr && HasMapArraySec)8060break;8061}8062}8063for (const auto &M : Data.second) {8064for (const MapInfo &L : M) {8065assert(!L.Components.empty() &&8066"Not expecting declaration with no component lists.");80678068// Remember the current base pointer index.8069unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();8070unsigned StructBasePointersIdx =8071StructBaseCurInfo.BasePointers.size();8072CurInfo.NonContigInfo.IsNonContiguous =8073L.Components.back().isNonContiguous();8074generateInfoForComponentList(8075L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,8076CurInfo, StructBaseCurInfo, PartialStruct,8077/*IsFirstComponentList=*/false, L.IsImplicit,8078/*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,8079L.VarRef, /*OverlappedElements*/ std::nullopt,8080HasMapBasePtr && HasMapArraySec);80818082// If this entry relates to a device pointer, set the relevant8083// declaration and add the 'return pointer' flag.8084if (L.ReturnDevicePointer) {8085// Check whether a value was added to either CurInfo or8086// StructBaseCurInfo and error if no value was added to either of8087// them:8088assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||8089StructBasePointersIdx <8090StructBaseCurInfo.BasePointers.size()) &&8091"Unexpected number of mapped base pointers.");80928093// Choose a base pointer index which is always valid:8094const ValueDecl *RelevantVD =8095L.Components.back().getAssociatedDeclaration();8096assert(RelevantVD &&8097"No relevant declaration related with device pointer??");80988099// If StructBaseCurInfo has been updated this iteration then work on8100// the first new entry added to it i.e. make sure that when multiple8101// values are added to any of the lists, the first value added is8102// being modified by the assignments below (not the last value8103// added).8104if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {8105StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =8106RelevantVD;8107StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =8108L.ForDeviceAddr ? DeviceInfoTy::Address8109: DeviceInfoTy::Pointer;8110StructBaseCurInfo.Types[StructBasePointersIdx] |=8111OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;8112} else {8113CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;8114CurInfo.DevicePointers[CurrentBasePointersIdx] =8115L.ForDeviceAddr ? DeviceInfoTy::Address8116: DeviceInfoTy::Pointer;8117CurInfo.Types[CurrentBasePointersIdx] |=8118OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;8119}8120}8121}8122}81238124// Append any pending zero-length pointers which are struct members and8125// used with use_device_ptr or use_device_addr.8126auto CI = DeferredInfo.find(Data.first);8127if (CI != DeferredInfo.end()) {8128for (const DeferredDevicePtrEntryTy &L : CI->second) {8129llvm::Value *BasePtr;8130llvm::Value *Ptr;8131if (L.ForDeviceAddr) {8132if (L.IE->isGLValue())8133Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);8134else8135Ptr = this->CGF.EmitScalarExpr(L.IE);8136BasePtr = Ptr;8137// Entry is RETURN_PARAM. Also, set the placeholder value8138// MEMBER_OF=FFFF so that the entry is later updated with the8139// correct value of MEMBER_OF.8140CurInfo.Types.push_back(8141OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |8142OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);8143} else {8144BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);8145Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),8146L.IE->getExprLoc());8147// Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the8148// placeholder value MEMBER_OF=FFFF so that the entry is later8149// updated with the correct value of MEMBER_OF.8150CurInfo.Types.push_back(8151OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |8152OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |8153OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);8154}8155CurInfo.Exprs.push_back(L.VD);8156CurInfo.BasePointers.emplace_back(BasePtr);8157CurInfo.DevicePtrDecls.emplace_back(L.VD);8158CurInfo.DevicePointers.emplace_back(8159L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);8160CurInfo.Pointers.push_back(Ptr);8161CurInfo.Sizes.push_back(8162llvm::Constant::getNullValue(this->CGF.Int64Ty));8163CurInfo.Mappers.push_back(nullptr);8164}8165}81668167// Unify entries in one list making sure the struct mapping precedes the8168// individual fields:8169MapCombinedInfoTy UnionCurInfo;8170UnionCurInfo.append(StructBaseCurInfo);8171UnionCurInfo.append(CurInfo);81728173// If there is an entry in PartialStruct it means we have a struct with8174// individual members mapped. Emit an extra combined entry.8175if (PartialStruct.Base.isValid()) {8176UnionCurInfo.NonContigInfo.Dims.push_back(0);8177// Emit a combined entry:8178emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,8179/*IsMapThis*/ !VD, OMPBuilder, VD);8180}81818182// We need to append the results of this capture to what we already have.8183CombinedInfo.append(UnionCurInfo);8184}8185// Append data for use_device_ptr clauses.8186CombinedInfo.append(UseDeviceDataCombinedInfo);8187}81888189public:8190MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)8191: CurDir(&Dir), CGF(CGF) {8192// Extract firstprivate clause information.8193for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())8194for (const auto *D : C->varlists())8195FirstPrivateDecls.try_emplace(8196cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());8197// Extract implicit firstprivates from uses_allocators clauses.8198for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {8199for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {8200OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);8201if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))8202FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),8203/*Implicit=*/true);8204else if (const auto *VD = dyn_cast<VarDecl>(8205cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())8206->getDecl()))8207FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);8208}8209}8210// Extract device pointer clause information.8211for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())8212for (auto L : C->component_lists())8213DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));8214// Extract device addr clause information.8215for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())8216for (auto L : C->component_lists())8217HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));8218// Extract map information.8219for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {8220if (C->getMapType() != OMPC_MAP_to)8221continue;8222for (auto L : C->component_lists()) {8223const ValueDecl *VD = std::get<0>(L);8224const auto *RD = VD ? VD->getType()8225.getCanonicalType()8226.getNonReferenceType()8227->getAsCXXRecordDecl()8228: nullptr;8229if (RD && RD->isLambda())8230LambdasMap.try_emplace(std::get<0>(L), C);8231}8232}8233}82348235/// Constructor for the declare mapper directive.8236MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)8237: CurDir(&Dir), CGF(CGF) {}82388239/// Generate code for the combined entry if we have a partially mapped struct8240/// and take care of the mapping flags of the arguments corresponding to8241/// individual struct members.8242void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,8243MapFlagsArrayTy &CurTypes,8244const StructRangeInfoTy &PartialStruct, bool IsMapThis,8245llvm::OpenMPIRBuilder &OMPBuilder,8246const ValueDecl *VD = nullptr,8247bool NotTargetParams = true) const {8248if (CurTypes.size() == 1 &&8249((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=8250OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&8251!PartialStruct.IsArraySection)8252return;8253Address LBAddr = PartialStruct.LowestElem.second;8254Address HBAddr = PartialStruct.HighestElem.second;8255if (PartialStruct.HasCompleteRecord) {8256LBAddr = PartialStruct.LB;8257HBAddr = PartialStruct.LB;8258}8259CombinedInfo.Exprs.push_back(VD);8260// Base is the base of the struct8261CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));8262CombinedInfo.DevicePtrDecls.push_back(nullptr);8263CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);8264// Pointer is the address of the lowest element8265llvm::Value *LB = LBAddr.emitRawPointer(CGF);8266const CXXMethodDecl *MD =8267CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;8268const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;8269bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;8270// There should not be a mapper for a combined entry.8271if (HasBaseClass) {8272// OpenMP 5.2 148:21:8273// If the target construct is within a class non-static member function,8274// and a variable is an accessible data member of the object for which the8275// non-static data member function is invoked, the variable is treated as8276// if the this[:1] expression had appeared in a map clause with a map-type8277// of tofrom.8278// Emit this[:1]8279CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));8280QualType Ty = MD->getFunctionObjectParameterType();8281llvm::Value *Size =8282CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,8283/*isSigned=*/true);8284CombinedInfo.Sizes.push_back(Size);8285} else {8286CombinedInfo.Pointers.push_back(LB);8287// Size is (addr of {highest+1} element) - (addr of lowest element)8288llvm::Value *HB = HBAddr.emitRawPointer(CGF);8289llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(8290HBAddr.getElementType(), HB, /*Idx0=*/1);8291llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);8292llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);8293llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);8294llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,8295/*isSigned=*/false);8296CombinedInfo.Sizes.push_back(Size);8297}8298CombinedInfo.Mappers.push_back(nullptr);8299// Map type is always TARGET_PARAM, if generate info for captures.8300CombinedInfo.Types.push_back(8301NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE8302: OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);8303// If any element has the present modifier, then make sure the runtime8304// doesn't attempt to allocate the struct.8305if (CurTypes.end() !=8306llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {8307return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(8308Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);8309}))8310CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;8311// Remove TARGET_PARAM flag from the first element8312(*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;8313// If any element has the ompx_hold modifier, then make sure the runtime8314// uses the hold reference count for the struct as a whole so that it won't8315// be unmapped by an extra dynamic reference count decrement. Add it to all8316// elements as well so the runtime knows which reference count to check8317// when determining whether it's time for device-to-host transfers of8318// individual elements.8319if (CurTypes.end() !=8320llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {8321return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(8322Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);8323})) {8324CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;8325for (auto &M : CurTypes)8326M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;8327}83288329// All other current entries will be MEMBER_OF the combined entry8330// (except for PTR_AND_OBJ entries which do not have a placeholder value8331// 0xFFFF in the MEMBER_OF field).8332OpenMPOffloadMappingFlags MemberOfFlag =8333OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);8334for (auto &M : CurTypes)8335OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);8336}83378338/// Generate all the base pointers, section pointers, sizes, map types, and8339/// mappers for the extracted mappable expressions (all included in \a8340/// CombinedInfo). Also, for each item that relates with a device pointer, a8341/// pair of the relevant declaration and index where it occurs is appended to8342/// the device pointers info array.8343void generateAllInfo(8344MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,8345const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =8346llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {8347assert(CurDir.is<const OMPExecutableDirective *>() &&8348"Expect a executable directive");8349const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();8350generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,8351SkipVarSet);8352}83538354/// Generate all the base pointers, section pointers, sizes, map types, and8355/// mappers for the extracted map clauses of user-defined mapper (all included8356/// in \a CombinedInfo).8357void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,8358llvm::OpenMPIRBuilder &OMPBuilder) const {8359assert(CurDir.is<const OMPDeclareMapperDecl *>() &&8360"Expect a declare mapper directive");8361const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();8362generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,8363OMPBuilder);8364}83658366/// Emit capture info for lambdas for variables captured by reference.8367void generateInfoForLambdaCaptures(8368const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,8369llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {8370QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();8371const auto *RD = VDType->getAsCXXRecordDecl();8372if (!RD || !RD->isLambda())8373return;8374Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),8375CGF.getContext().getDeclAlign(VD));8376LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);8377llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;8378FieldDecl *ThisCapture = nullptr;8379RD->getCaptureFields(Captures, ThisCapture);8380if (ThisCapture) {8381LValue ThisLVal =8382CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);8383LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);8384LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),8385VDLVal.getPointer(CGF));8386CombinedInfo.Exprs.push_back(VD);8387CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));8388CombinedInfo.DevicePtrDecls.push_back(nullptr);8389CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);8390CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));8391CombinedInfo.Sizes.push_back(8392CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),8393CGF.Int64Ty, /*isSigned=*/true));8394CombinedInfo.Types.push_back(8395OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |8396OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |8397OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |8398OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);8399CombinedInfo.Mappers.push_back(nullptr);8400}8401for (const LambdaCapture &LC : RD->captures()) {8402if (!LC.capturesVariable())8403continue;8404const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());8405if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())8406continue;8407auto It = Captures.find(VD);8408assert(It != Captures.end() && "Found lambda capture without field.");8409LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);8410if (LC.getCaptureKind() == LCK_ByRef) {8411LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);8412LambdaPointers.try_emplace(VarLVal.getPointer(CGF),8413VDLVal.getPointer(CGF));8414CombinedInfo.Exprs.push_back(VD);8415CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));8416CombinedInfo.DevicePtrDecls.push_back(nullptr);8417CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);8418CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));8419CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(8420CGF.getTypeSize(8421VD->getType().getCanonicalType().getNonReferenceType()),8422CGF.Int64Ty, /*isSigned=*/true));8423} else {8424RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());8425LambdaPointers.try_emplace(VarLVal.getPointer(CGF),8426VDLVal.getPointer(CGF));8427CombinedInfo.Exprs.push_back(VD);8428CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));8429CombinedInfo.DevicePtrDecls.push_back(nullptr);8430CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);8431CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());8432CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));8433}8434CombinedInfo.Types.push_back(8435OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |8436OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |8437OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |8438OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);8439CombinedInfo.Mappers.push_back(nullptr);8440}8441}84428443/// Set correct indices for lambdas captures.8444void adjustMemberOfForLambdaCaptures(8445llvm::OpenMPIRBuilder &OMPBuilder,8446const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,8447MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,8448MapFlagsArrayTy &Types) const {8449for (unsigned I = 0, E = Types.size(); I < E; ++I) {8450// Set correct member_of idx for all implicit lambda captures.8451if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |8452OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |8453OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |8454OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))8455continue;8456llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);8457assert(BasePtr && "Unable to find base lambda address.");8458int TgtIdx = -1;8459for (unsigned J = I; J > 0; --J) {8460unsigned Idx = J - 1;8461if (Pointers[Idx] != BasePtr)8462continue;8463TgtIdx = Idx;8464break;8465}8466assert(TgtIdx != -1 && "Unable to find parent lambda.");8467// All other current entries will be MEMBER_OF the combined entry8468// (except for PTR_AND_OBJ entries which do not have a placeholder value8469// 0xFFFF in the MEMBER_OF field).8470OpenMPOffloadMappingFlags MemberOfFlag =8471OMPBuilder.getMemberOfFlag(TgtIdx);8472OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);8473}8474}84758476/// Generate the base pointers, section pointers, sizes, map types, and8477/// mappers associated to a given capture (all included in \a CombinedInfo).8478void generateInfoForCapture(const CapturedStmt::Capture *Cap,8479llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,8480StructRangeInfoTy &PartialStruct) const {8481assert(!Cap->capturesVariableArrayType() &&8482"Not expecting to generate map info for a variable array type!");84838484// We need to know when we generating information for the first component8485const ValueDecl *VD = Cap->capturesThis()8486? nullptr8487: Cap->getCapturedVar()->getCanonicalDecl();84888489// for map(to: lambda): skip here, processing it in8490// generateDefaultMapInfo8491if (LambdasMap.count(VD))8492return;84938494// If this declaration appears in a is_device_ptr clause we just have to8495// pass the pointer by value. If it is a reference to a declaration, we just8496// pass its value.8497if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {8498CombinedInfo.Exprs.push_back(VD);8499CombinedInfo.BasePointers.emplace_back(Arg);8500CombinedInfo.DevicePtrDecls.emplace_back(VD);8501CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);8502CombinedInfo.Pointers.push_back(Arg);8503CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(8504CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,8505/*isSigned=*/true));8506CombinedInfo.Types.push_back(8507OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |8508OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);8509CombinedInfo.Mappers.push_back(nullptr);8510return;8511}85128513using MapData =8514std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,8515OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,8516const ValueDecl *, const Expr *>;8517SmallVector<MapData, 4> DeclComponentLists;8518// For member fields list in is_device_ptr, store it in8519// DeclComponentLists for generating components info.8520static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;8521auto It = DevPointersMap.find(VD);8522if (It != DevPointersMap.end())8523for (const auto &MCL : It->second)8524DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,8525/*IsImpicit = */ true, nullptr,8526nullptr);8527auto I = HasDevAddrsMap.find(VD);8528if (I != HasDevAddrsMap.end())8529for (const auto &MCL : I->second)8530DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,8531/*IsImpicit = */ true, nullptr,8532nullptr);8533assert(CurDir.is<const OMPExecutableDirective *>() &&8534"Expect a executable directive");8535const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();8536bool HasMapBasePtr = false;8537bool HasMapArraySec = false;8538for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {8539const auto *EI = C->getVarRefs().begin();8540for (const auto L : C->decl_component_lists(VD)) {8541const ValueDecl *VDecl, *Mapper;8542// The Expression is not correct if the mapping is implicit8543const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;8544OMPClauseMappableExprCommon::MappableExprComponentListRef Components;8545std::tie(VDecl, Components, Mapper) = L;8546assert(VDecl == VD && "We got information for the wrong declaration??");8547assert(!Components.empty() &&8548"Not expecting declaration with no component lists.");8549if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))8550HasMapBasePtr = true;8551if (VD && E && VD->getType()->isAnyPointerType() &&8552(isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))8553HasMapArraySec = true;8554DeclComponentLists.emplace_back(Components, C->getMapType(),8555C->getMapTypeModifiers(),8556C->isImplicit(), Mapper, E);8557++EI;8558}8559}8560llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,8561const MapData &RHS) {8562ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);8563OpenMPMapClauseKind MapType = std::get<1>(RHS);8564bool HasPresent =8565llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);8566bool HasAllocs = MapType == OMPC_MAP_alloc;8567MapModifiers = std::get<2>(RHS);8568MapType = std::get<1>(LHS);8569bool HasPresentR =8570llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);8571bool HasAllocsR = MapType == OMPC_MAP_alloc;8572return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);8573});85748575// Find overlapping elements (including the offset from the base element).8576llvm::SmallDenseMap<8577const MapData *,8578llvm::SmallVector<8579OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,85804>8581OverlappedData;8582size_t Count = 0;8583for (const MapData &L : DeclComponentLists) {8584OMPClauseMappableExprCommon::MappableExprComponentListRef Components;8585OpenMPMapClauseKind MapType;8586ArrayRef<OpenMPMapModifierKind> MapModifiers;8587bool IsImplicit;8588const ValueDecl *Mapper;8589const Expr *VarRef;8590std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =8591L;8592++Count;8593for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {8594OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;8595std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,8596VarRef) = L1;8597auto CI = Components.rbegin();8598auto CE = Components.rend();8599auto SI = Components1.rbegin();8600auto SE = Components1.rend();8601for (; CI != CE && SI != SE; ++CI, ++SI) {8602if (CI->getAssociatedExpression()->getStmtClass() !=8603SI->getAssociatedExpression()->getStmtClass())8604break;8605// Are we dealing with different variables/fields?8606if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())8607break;8608}8609// Found overlapping if, at least for one component, reached the head8610// of the components list.8611if (CI == CE || SI == SE) {8612// Ignore it if it is the same component.8613if (CI == CE && SI == SE)8614continue;8615const auto It = (SI == SE) ? CI : SI;8616// If one component is a pointer and another one is a kind of8617// dereference of this pointer (array subscript, section, dereference,8618// etc.), it is not an overlapping.8619// Same, if one component is a base and another component is a8620// dereferenced pointer memberexpr with the same base.8621if (!isa<MemberExpr>(It->getAssociatedExpression()) ||8622(std::prev(It)->getAssociatedDeclaration() &&8623std::prev(It)8624->getAssociatedDeclaration()8625->getType()8626->isPointerType()) ||8627(It->getAssociatedDeclaration() &&8628It->getAssociatedDeclaration()->getType()->isPointerType() &&8629std::next(It) != CE && std::next(It) != SE))8630continue;8631const MapData &BaseData = CI == CE ? L : L1;8632OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =8633SI == SE ? Components : Components1;8634auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);8635OverlappedElements.getSecond().push_back(SubData);8636}8637}8638}8639// Sort the overlapped elements for each item.8640llvm::SmallVector<const FieldDecl *, 4> Layout;8641if (!OverlappedData.empty()) {8642const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();8643const Type *OrigType = BaseType->getPointeeOrArrayElementType();8644while (BaseType != OrigType) {8645BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();8646OrigType = BaseType->getPointeeOrArrayElementType();8647}86488649if (const auto *CRD = BaseType->getAsCXXRecordDecl())8650getPlainLayout(CRD, Layout, /*AsBase=*/false);8651else {8652const auto *RD = BaseType->getAsRecordDecl();8653Layout.append(RD->field_begin(), RD->field_end());8654}8655}8656for (auto &Pair : OverlappedData) {8657llvm::stable_sort(8658Pair.getSecond(),8659[&Layout](8660OMPClauseMappableExprCommon::MappableExprComponentListRef First,8661OMPClauseMappableExprCommon::MappableExprComponentListRef8662Second) {8663auto CI = First.rbegin();8664auto CE = First.rend();8665auto SI = Second.rbegin();8666auto SE = Second.rend();8667for (; CI != CE && SI != SE; ++CI, ++SI) {8668if (CI->getAssociatedExpression()->getStmtClass() !=8669SI->getAssociatedExpression()->getStmtClass())8670break;8671// Are we dealing with different variables/fields?8672if (CI->getAssociatedDeclaration() !=8673SI->getAssociatedDeclaration())8674break;8675}86768677// Lists contain the same elements.8678if (CI == CE && SI == SE)8679return false;86808681// List with less elements is less than list with more elements.8682if (CI == CE || SI == SE)8683return CI == CE;86848685const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());8686const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());8687if (FD1->getParent() == FD2->getParent())8688return FD1->getFieldIndex() < FD2->getFieldIndex();8689const auto *It =8690llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {8691return FD == FD1 || FD == FD2;8692});8693return *It == FD1;8694});8695}86968697// Associated with a capture, because the mapping flags depend on it.8698// Go through all of the elements with the overlapped elements.8699bool IsFirstComponentList = true;8700MapCombinedInfoTy StructBaseCombinedInfo;8701for (const auto &Pair : OverlappedData) {8702const MapData &L = *Pair.getFirst();8703OMPClauseMappableExprCommon::MappableExprComponentListRef Components;8704OpenMPMapClauseKind MapType;8705ArrayRef<OpenMPMapModifierKind> MapModifiers;8706bool IsImplicit;8707const ValueDecl *Mapper;8708const Expr *VarRef;8709std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =8710L;8711ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>8712OverlappedComponents = Pair.getSecond();8713generateInfoForComponentList(8714MapType, MapModifiers, std::nullopt, Components, CombinedInfo,8715StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,8716IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,8717/*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);8718IsFirstComponentList = false;8719}8720// Go through other elements without overlapped elements.8721for (const MapData &L : DeclComponentLists) {8722OMPClauseMappableExprCommon::MappableExprComponentListRef Components;8723OpenMPMapClauseKind MapType;8724ArrayRef<OpenMPMapModifierKind> MapModifiers;8725bool IsImplicit;8726const ValueDecl *Mapper;8727const Expr *VarRef;8728std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =8729L;8730auto It = OverlappedData.find(&L);8731if (It == OverlappedData.end())8732generateInfoForComponentList(8733MapType, MapModifiers, std::nullopt, Components, CombinedInfo,8734StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,8735IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,8736/*ForDeviceAddr=*/false, VD, VarRef,8737/*OverlappedElements*/ std::nullopt,8738HasMapBasePtr && HasMapArraySec);8739IsFirstComponentList = false;8740}8741}87428743/// Generate the default map information for a given capture \a CI,8744/// record field declaration \a RI and captured value \a CV.8745void generateDefaultMapInfo(const CapturedStmt::Capture &CI,8746const FieldDecl &RI, llvm::Value *CV,8747MapCombinedInfoTy &CombinedInfo) const {8748bool IsImplicit = true;8749// Do the default mapping.8750if (CI.capturesThis()) {8751CombinedInfo.Exprs.push_back(nullptr);8752CombinedInfo.BasePointers.push_back(CV);8753CombinedInfo.DevicePtrDecls.push_back(nullptr);8754CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);8755CombinedInfo.Pointers.push_back(CV);8756const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());8757CombinedInfo.Sizes.push_back(8758CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),8759CGF.Int64Ty, /*isSigned=*/true));8760// Default map type.8761CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |8762OpenMPOffloadMappingFlags::OMP_MAP_FROM);8763} else if (CI.capturesVariableByCopy()) {8764const VarDecl *VD = CI.getCapturedVar();8765CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());8766CombinedInfo.BasePointers.push_back(CV);8767CombinedInfo.DevicePtrDecls.push_back(nullptr);8768CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);8769CombinedInfo.Pointers.push_back(CV);8770if (!RI.getType()->isAnyPointerType()) {8771// We have to signal to the runtime captures passed by value that are8772// not pointers.8773CombinedInfo.Types.push_back(8774OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);8775CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(8776CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));8777} else {8778// Pointers are implicitly mapped with a zero size and no flags8779// (other than first map that is added for all implicit maps).8780CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);8781CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));8782}8783auto I = FirstPrivateDecls.find(VD);8784if (I != FirstPrivateDecls.end())8785IsImplicit = I->getSecond();8786} else {8787assert(CI.capturesVariable() && "Expected captured reference.");8788const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());8789QualType ElementType = PtrTy->getPointeeType();8790CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(8791CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));8792// The default map type for a scalar/complex type is 'to' because by8793// default the value doesn't have to be retrieved. For an aggregate8794// type, the default is 'tofrom'.8795CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));8796const VarDecl *VD = CI.getCapturedVar();8797auto I = FirstPrivateDecls.find(VD);8798CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());8799CombinedInfo.BasePointers.push_back(CV);8800CombinedInfo.DevicePtrDecls.push_back(nullptr);8801CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);8802if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {8803Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(8804CV, ElementType, CGF.getContext().getDeclAlign(VD),8805AlignmentSource::Decl));8806CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));8807} else {8808CombinedInfo.Pointers.push_back(CV);8809}8810if (I != FirstPrivateDecls.end())8811IsImplicit = I->getSecond();8812}8813// Every default map produces a single argument which is a target parameter.8814CombinedInfo.Types.back() |=8815OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;88168817// Add flag stating this is an implicit map.8818if (IsImplicit)8819CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;88208821// No user-defined mapper for default mapping.8822CombinedInfo.Mappers.push_back(nullptr);8823}8824};8825} // anonymous namespace88268827// Try to extract the base declaration from a `this->x` expression if possible.8828static ValueDecl *getDeclFromThisExpr(const Expr *E) {8829if (!E)8830return nullptr;88318832if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))8833if (const MemberExpr *ME =8834dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))8835return ME->getMemberDecl();8836return nullptr;8837}88388839/// Emit a string constant containing the names of the values mapped to the8840/// offloading runtime library.8841llvm::Constant *8842emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,8843MappableExprsHandler::MappingExprInfo &MapExprs) {88448845uint32_t SrcLocStrSize;8846if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())8847return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);88488849SourceLocation Loc;8850if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {8851if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))8852Loc = VD->getLocation();8853else8854Loc = MapExprs.getMapExpr()->getExprLoc();8855} else {8856Loc = MapExprs.getMapDecl()->getLocation();8857}88588859std::string ExprName;8860if (MapExprs.getMapExpr()) {8861PrintingPolicy P(CGF.getContext().getLangOpts());8862llvm::raw_string_ostream OS(ExprName);8863MapExprs.getMapExpr()->printPretty(OS, nullptr, P);8864OS.flush();8865} else {8866ExprName = MapExprs.getMapDecl()->getNameAsString();8867}88688869PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);8870return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,8871PLoc.getLine(), PLoc.getColumn(),8872SrcLocStrSize);8873}88748875/// Emit the arrays used to pass the captures and map information to the8876/// offloading runtime library. If there is no map or capture information,8877/// return nullptr by reference.8878static void emitOffloadingArrays(8879CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,8880CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,8881bool IsNonContiguous = false) {8882CodeGenModule &CGM = CGF.CGM;88838884// Reset the array information.8885Info.clearArrayInfo();8886Info.NumberOfPtrs = CombinedInfo.BasePointers.size();88878888using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;8889InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),8890CGF.AllocaInsertPt->getIterator());8891InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),8892CGF.Builder.GetInsertPoint());88938894auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {8895return emitMappingInformation(CGF, OMPBuilder, MapExpr);8896};8897if (CGM.getCodeGenOpts().getDebugInfo() !=8898llvm::codegenoptions::NoDebugInfo) {8899CombinedInfo.Names.resize(CombinedInfo.Exprs.size());8900llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),8901FillInfoMap);8902}89038904auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {8905if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {8906Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);8907}8908};89098910auto CustomMapperCB = [&](unsigned int I) {8911llvm::Value *MFunc = nullptr;8912if (CombinedInfo.Mappers[I]) {8913Info.HasMapper = true;8914MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(8915cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));8916}8917return MFunc;8918};8919OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,8920/*IsNonContiguous=*/true, DeviceAddrCB,8921CustomMapperCB);8922}89238924/// Check for inner distribute directive.8925static const OMPExecutableDirective *8926getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {8927const auto *CS = D.getInnermostCapturedStmt();8928const auto *Body =8929CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);8930const Stmt *ChildStmt =8931CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);89328933if (const auto *NestedDir =8934dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {8935OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();8936switch (D.getDirectiveKind()) {8937case OMPD_target:8938// For now, treat 'target' with nested 'teams loop' as if it's8939// distributed (target teams distribute).8940if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)8941return NestedDir;8942if (DKind == OMPD_teams) {8943Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(8944/*IgnoreCaptured=*/true);8945if (!Body)8946return nullptr;8947ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);8948if (const auto *NND =8949dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {8950DKind = NND->getDirectiveKind();8951if (isOpenMPDistributeDirective(DKind))8952return NND;8953}8954}8955return nullptr;8956case OMPD_target_teams:8957if (isOpenMPDistributeDirective(DKind))8958return NestedDir;8959return nullptr;8960case OMPD_target_parallel:8961case OMPD_target_simd:8962case OMPD_target_parallel_for:8963case OMPD_target_parallel_for_simd:8964return nullptr;8965case OMPD_target_teams_distribute:8966case OMPD_target_teams_distribute_simd:8967case OMPD_target_teams_distribute_parallel_for:8968case OMPD_target_teams_distribute_parallel_for_simd:8969case OMPD_parallel:8970case OMPD_for:8971case OMPD_parallel_for:8972case OMPD_parallel_master:8973case OMPD_parallel_sections:8974case OMPD_for_simd:8975case OMPD_parallel_for_simd:8976case OMPD_cancel:8977case OMPD_cancellation_point:8978case OMPD_ordered:8979case OMPD_threadprivate:8980case OMPD_allocate:8981case OMPD_task:8982case OMPD_simd:8983case OMPD_tile:8984case OMPD_unroll:8985case OMPD_sections:8986case OMPD_section:8987case OMPD_single:8988case OMPD_master:8989case OMPD_critical:8990case OMPD_taskyield:8991case OMPD_barrier:8992case OMPD_taskwait:8993case OMPD_taskgroup:8994case OMPD_atomic:8995case OMPD_flush:8996case OMPD_depobj:8997case OMPD_scan:8998case OMPD_teams:8999case OMPD_target_data:9000case OMPD_target_exit_data:9001case OMPD_target_enter_data:9002case OMPD_distribute:9003case OMPD_distribute_simd:9004case OMPD_distribute_parallel_for:9005case OMPD_distribute_parallel_for_simd:9006case OMPD_teams_distribute:9007case OMPD_teams_distribute_simd:9008case OMPD_teams_distribute_parallel_for:9009case OMPD_teams_distribute_parallel_for_simd:9010case OMPD_target_update:9011case OMPD_declare_simd:9012case OMPD_declare_variant:9013case OMPD_begin_declare_variant:9014case OMPD_end_declare_variant:9015case OMPD_declare_target:9016case OMPD_end_declare_target:9017case OMPD_declare_reduction:9018case OMPD_declare_mapper:9019case OMPD_taskloop:9020case OMPD_taskloop_simd:9021case OMPD_master_taskloop:9022case OMPD_master_taskloop_simd:9023case OMPD_parallel_master_taskloop:9024case OMPD_parallel_master_taskloop_simd:9025case OMPD_requires:9026case OMPD_metadirective:9027case OMPD_unknown:9028default:9029llvm_unreachable("Unexpected directive.");9030}9031}90329033return nullptr;9034}90359036/// Emit the user-defined mapper function. The code generation follows the9037/// pattern in the example below.9038/// \code9039/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,9040/// void *base, void *begin,9041/// int64_t size, int64_t type,9042/// void *name = nullptr) {9043/// // Allocate space for an array section first or add a base/begin for9044/// // pointer dereference.9045/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&9046/// !maptype.IsDelete)9047/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,9048/// size*sizeof(Ty), clearToFromMember(type));9049/// // Map members.9050/// for (unsigned i = 0; i < size; i++) {9051/// // For each component specified by this mapper:9052/// for (auto c : begin[i]->all_components) {9053/// if (c.hasMapper())9054/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,9055/// c.arg_type, c.arg_name);9056/// else9057/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,9058/// c.arg_begin, c.arg_size, c.arg_type,9059/// c.arg_name);9060/// }9061/// }9062/// // Delete the array section.9063/// if (size > 1 && maptype.IsDelete)9064/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,9065/// size*sizeof(Ty), clearToFromMember(type));9066/// }9067/// \endcode9068void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,9069CodeGenFunction *CGF) {9070if (UDMMap.count(D) > 0)9071return;9072ASTContext &C = CGM.getContext();9073QualType Ty = D->getType();9074QualType PtrTy = C.getPointerType(Ty).withRestrict();9075QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);9076auto *MapperVarDecl =9077cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());9078SourceLocation Loc = D->getLocation();9079CharUnits ElementSize = C.getTypeSizeInChars(Ty);9080llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);90819082// Prepare mapper function arguments and attributes.9083ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,9084C.VoidPtrTy, ImplicitParamKind::Other);9085ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,9086ImplicitParamKind::Other);9087ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,9088C.VoidPtrTy, ImplicitParamKind::Other);9089ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,9090ImplicitParamKind::Other);9091ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,9092ImplicitParamKind::Other);9093ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,9094ImplicitParamKind::Other);9095FunctionArgList Args;9096Args.push_back(&HandleArg);9097Args.push_back(&BaseArg);9098Args.push_back(&BeginArg);9099Args.push_back(&SizeArg);9100Args.push_back(&TypeArg);9101Args.push_back(&NameArg);9102const CGFunctionInfo &FnInfo =9103CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);9104llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);9105SmallString<64> TyStr;9106llvm::raw_svector_ostream Out(TyStr);9107CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);9108std::string Name = getName({"omp_mapper", TyStr, D->getName()});9109auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,9110Name, &CGM.getModule());9111CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);9112Fn->removeFnAttr(llvm::Attribute::OptimizeNone);9113// Start the mapper function code generation.9114CodeGenFunction MapperCGF(CGM);9115MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);9116// Compute the starting and end addresses of array elements.9117llvm::Value *Size = MapperCGF.EmitLoadOfScalar(9118MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,9119C.getPointerType(Int64Ty), Loc);9120// Prepare common arguments for array initiation and deletion.9121llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(9122MapperCGF.GetAddrOfLocalVar(&HandleArg),9123/*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);9124llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(9125MapperCGF.GetAddrOfLocalVar(&BaseArg),9126/*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);9127llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(9128MapperCGF.GetAddrOfLocalVar(&BeginArg),9129/*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);9130// Convert the size in bytes into the number of array elements.9131Size = MapperCGF.Builder.CreateExactUDiv(9132Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));9133llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(9134BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));9135llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);9136llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(9137MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,9138C.getPointerType(Int64Ty), Loc);9139llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(9140MapperCGF.GetAddrOfLocalVar(&NameArg),9141/*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);91429143// Emit array initiation if this is an array section and \p MapType indicates9144// that memory allocation is required.9145llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");9146emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,9147MapName, ElementSize, HeadBB, /*IsInit=*/true);91489149// Emit a for loop to iterate through SizeArg of elements and map all of them.91509151// Emit the loop header block.9152MapperCGF.EmitBlock(HeadBB);9153llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");9154llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");9155// Evaluate whether the initial condition is satisfied.9156llvm::Value *IsEmpty =9157MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");9158MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);9159llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();91609161// Emit the loop body block.9162MapperCGF.EmitBlock(BodyBB);9163llvm::BasicBlock *LastBB = BodyBB;9164llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(9165PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");9166PtrPHI->addIncoming(PtrBegin, EntryBB);9167Address PtrCurrent(PtrPHI, ElemTy,9168MapperCGF.GetAddrOfLocalVar(&BeginArg)9169.getAlignment()9170.alignmentOfArrayElement(ElementSize));9171// Privatize the declared variable of mapper to be the current array element.9172CodeGenFunction::OMPPrivateScope Scope(MapperCGF);9173Scope.addPrivate(MapperVarDecl, PtrCurrent);9174(void)Scope.Privatize();91759176// Get map clause information. Fill up the arrays with all mapped variables.9177MappableExprsHandler::MapCombinedInfoTy Info;9178MappableExprsHandler MEHandler(*D, MapperCGF);9179MEHandler.generateAllInfoForMapper(Info, OMPBuilder);91809181// Call the runtime API __tgt_mapper_num_components to get the number of9182// pre-existing components.9183llvm::Value *OffloadingArgs[] = {Handle};9184llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(9185OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),9186OMPRTL___tgt_mapper_num_components),9187OffloadingArgs);9188llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(9189PreviousSize,9190MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));91919192// Fill up the runtime mapper handle for all components.9193for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {9194llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(9195Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));9196llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(9197Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));9198llvm::Value *CurSizeArg = Info.Sizes[I];9199llvm::Value *CurNameArg =9200(CGM.getCodeGenOpts().getDebugInfo() ==9201llvm::codegenoptions::NoDebugInfo)9202? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)9203: emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);92049205// Extract the MEMBER_OF field from the map type.9206llvm::Value *OriMapType = MapperCGF.Builder.getInt64(9207static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9208Info.Types[I]));9209llvm::Value *MemberMapType =9210MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);92119212// Combine the map type inherited from user-defined mapper with that9213// specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM9214// bits of the \a MapType, which is the input argument of the mapper9215// function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM9216// bits of MemberMapType.9217// [OpenMP 5.0], 1.2.6. map-type decay.9218// | alloc | to | from | tofrom | release | delete9219// ----------------------------------------------------------9220// alloc | alloc | alloc | alloc | alloc | release | delete9221// to | alloc | to | alloc | to | release | delete9222// from | alloc | alloc | from | from | release | delete9223// tofrom | alloc | to | from | tofrom | release | delete9224llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(9225MapType,9226MapperCGF.Builder.getInt64(9227static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9228OpenMPOffloadMappingFlags::OMP_MAP_TO |9229OpenMPOffloadMappingFlags::OMP_MAP_FROM)));9230llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");9231llvm::BasicBlock *AllocElseBB =9232MapperCGF.createBasicBlock("omp.type.alloc.else");9233llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");9234llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");9235llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");9236llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");9237llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);9238MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);9239// In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.9240MapperCGF.EmitBlock(AllocBB);9241llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(9242MemberMapType,9243MapperCGF.Builder.getInt64(9244~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9245OpenMPOffloadMappingFlags::OMP_MAP_TO |9246OpenMPOffloadMappingFlags::OMP_MAP_FROM)));9247MapperCGF.Builder.CreateBr(EndBB);9248MapperCGF.EmitBlock(AllocElseBB);9249llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(9250LeftToFrom,9251MapperCGF.Builder.getInt64(9252static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9253OpenMPOffloadMappingFlags::OMP_MAP_TO)));9254MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);9255// In case of to, clear OMP_MAP_FROM.9256MapperCGF.EmitBlock(ToBB);9257llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(9258MemberMapType,9259MapperCGF.Builder.getInt64(9260~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9261OpenMPOffloadMappingFlags::OMP_MAP_FROM)));9262MapperCGF.Builder.CreateBr(EndBB);9263MapperCGF.EmitBlock(ToElseBB);9264llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(9265LeftToFrom,9266MapperCGF.Builder.getInt64(9267static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9268OpenMPOffloadMappingFlags::OMP_MAP_FROM)));9269MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);9270// In case of from, clear OMP_MAP_TO.9271MapperCGF.EmitBlock(FromBB);9272llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(9273MemberMapType,9274MapperCGF.Builder.getInt64(9275~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9276OpenMPOffloadMappingFlags::OMP_MAP_TO)));9277// In case of tofrom, do nothing.9278MapperCGF.EmitBlock(EndBB);9279LastBB = EndBB;9280llvm::PHINode *CurMapType =9281MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");9282CurMapType->addIncoming(AllocMapType, AllocBB);9283CurMapType->addIncoming(ToMapType, ToBB);9284CurMapType->addIncoming(FromMapType, FromBB);9285CurMapType->addIncoming(MemberMapType, ToElseBB);92869287llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,9288CurSizeArg, CurMapType, CurNameArg};9289if (Info.Mappers[I]) {9290// Call the corresponding mapper function.9291llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(9292cast<OMPDeclareMapperDecl>(Info.Mappers[I]));9293assert(MapperFunc && "Expect a valid mapper function is available.");9294MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);9295} else {9296// Call the runtime API __tgt_push_mapper_component to fill up the runtime9297// data structure.9298MapperCGF.EmitRuntimeCall(9299OMPBuilder.getOrCreateRuntimeFunction(9300CGM.getModule(), OMPRTL___tgt_push_mapper_component),9301OffloadingArgs);9302}9303}93049305// Update the pointer to point to the next element that needs to be mapped,9306// and check whether we have mapped all elements.9307llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(9308ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");9309PtrPHI->addIncoming(PtrNext, LastBB);9310llvm::Value *IsDone =9311MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");9312llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");9313MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);93149315MapperCGF.EmitBlock(ExitBB);9316// Emit array deletion if this is an array section and \p MapType indicates9317// that deletion is required.9318emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,9319MapName, ElementSize, DoneBB, /*IsInit=*/false);93209321// Emit the function exit block.9322MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);9323MapperCGF.FinishFunction();9324UDMMap.try_emplace(D, Fn);9325if (CGF) {9326auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);9327Decls.second.push_back(D);9328}9329}93309331/// Emit the array initialization or deletion portion for user-defined mapper9332/// code generation. First, it evaluates whether an array section is mapped and9333/// whether the \a MapType instructs to delete this section. If \a IsInit is9334/// true, and \a MapType indicates to not delete this array, array9335/// initialization code is generated. If \a IsInit is false, and \a MapType9336/// indicates to not this array, array deletion code is generated.9337void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(9338CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,9339llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,9340llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,9341bool IsInit) {9342StringRef Prefix = IsInit ? ".init" : ".del";93439344// Evaluate if this is an array section.9345llvm::BasicBlock *BodyBB =9346MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));9347llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(9348Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");9349llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(9350MapType,9351MapperCGF.Builder.getInt64(9352static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9353OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));9354llvm::Value *DeleteCond;9355llvm::Value *Cond;9356if (IsInit) {9357// base != begin?9358llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);9359// IsPtrAndObj?9360llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(9361MapType,9362MapperCGF.Builder.getInt64(9363static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9364OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));9365PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);9366BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);9367Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);9368DeleteCond = MapperCGF.Builder.CreateIsNull(9369DeleteBit, getName({"omp.array", Prefix, ".delete"}));9370} else {9371Cond = IsArray;9372DeleteCond = MapperCGF.Builder.CreateIsNotNull(9373DeleteBit, getName({"omp.array", Prefix, ".delete"}));9374}9375Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);9376MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);93779378MapperCGF.EmitBlock(BodyBB);9379// Get the array size by multiplying element size and element number (i.e., \p9380// Size).9381llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(9382Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));9383// Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves9384// memory allocation/deletion purpose only.9385llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(9386MapType,9387MapperCGF.Builder.getInt64(9388~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9389OpenMPOffloadMappingFlags::OMP_MAP_TO |9390OpenMPOffloadMappingFlags::OMP_MAP_FROM)));9391MapTypeArg = MapperCGF.Builder.CreateOr(9392MapTypeArg,9393MapperCGF.Builder.getInt64(9394static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(9395OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));93969397// Call the runtime API __tgt_push_mapper_component to fill up the runtime9398// data structure.9399llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,9400ArraySize, MapTypeArg, MapName};9401MapperCGF.EmitRuntimeCall(9402OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),9403OMPRTL___tgt_push_mapper_component),9404OffloadingArgs);9405}94069407llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(9408const OMPDeclareMapperDecl *D) {9409auto I = UDMMap.find(D);9410if (I != UDMMap.end())9411return I->second;9412emitUserDefinedMapper(D);9413return UDMMap.lookup(D);9414}94159416llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(9417CodeGenFunction &CGF, const OMPExecutableDirective &D,9418llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,9419const OMPLoopDirective &D)>9420SizeEmitter) {9421OpenMPDirectiveKind Kind = D.getDirectiveKind();9422const OMPExecutableDirective *TD = &D;9423// Get nested teams distribute kind directive, if any. For now, treat9424// 'target_teams_loop' as if it's really a target_teams_distribute.9425if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&9426Kind != OMPD_target_teams_loop)9427TD = getNestedDistributeDirective(CGM.getContext(), D);9428if (!TD)9429return llvm::ConstantInt::get(CGF.Int64Ty, 0);94309431const auto *LD = cast<OMPLoopDirective>(TD);9432if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))9433return NumIterations;9434return llvm::ConstantInt::get(CGF.Int64Ty, 0);9435}94369437static void9438emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,9439const OMPExecutableDirective &D,9440llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,9441bool RequiresOuterTask, const CapturedStmt &CS,9442bool OffloadingMandatory, CodeGenFunction &CGF) {9443if (OffloadingMandatory) {9444CGF.Builder.CreateUnreachable();9445} else {9446if (RequiresOuterTask) {9447CapturedVars.clear();9448CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);9449}9450OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,9451CapturedVars);9452}9453}94549455static llvm::Value *emitDeviceID(9456llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,9457CodeGenFunction &CGF) {9458// Emit device ID if any.9459llvm::Value *DeviceID;9460if (Device.getPointer()) {9461assert((Device.getInt() == OMPC_DEVICE_unknown ||9462Device.getInt() == OMPC_DEVICE_device_num) &&9463"Expected device_num modifier.");9464llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());9465DeviceID =9466CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);9467} else {9468DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);9469}9470return DeviceID;9471}94729473llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,9474CodeGenFunction &CGF) {9475llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);94769477if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {9478CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);9479llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(9480DynMemClause->getSize(), /*IgnoreResultAssign=*/true);9481DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,9482/*isSigned=*/false);9483}9484return DynCGroupMem;9485}94869487static void emitTargetCallKernelLaunch(9488CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,9489const OMPExecutableDirective &D,9490llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,9491const CapturedStmt &CS, bool OffloadingMandatory,9492llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,9493llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,9494llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,9495llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,9496const OMPLoopDirective &D)>9497SizeEmitter,9498CodeGenFunction &CGF, CodeGenModule &CGM) {9499llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();95009501// Fill up the arrays with all the captured variables.9502MappableExprsHandler::MapCombinedInfoTy CombinedInfo;95039504// Get mappable expression information.9505MappableExprsHandler MEHandler(D, CGF);9506llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;9507llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;95089509auto RI = CS.getCapturedRecordDecl()->field_begin();9510auto *CV = CapturedVars.begin();9511for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),9512CE = CS.capture_end();9513CI != CE; ++CI, ++RI, ++CV) {9514MappableExprsHandler::MapCombinedInfoTy CurInfo;9515MappableExprsHandler::StructRangeInfoTy PartialStruct;95169517// VLA sizes are passed to the outlined region by copy and do not have map9518// information associated.9519if (CI->capturesVariableArrayType()) {9520CurInfo.Exprs.push_back(nullptr);9521CurInfo.BasePointers.push_back(*CV);9522CurInfo.DevicePtrDecls.push_back(nullptr);9523CurInfo.DevicePointers.push_back(9524MappableExprsHandler::DeviceInfoTy::None);9525CurInfo.Pointers.push_back(*CV);9526CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(9527CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));9528// Copy to the device as an argument. No need to retrieve it.9529CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |9530OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |9531OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);9532CurInfo.Mappers.push_back(nullptr);9533} else {9534// If we have any information in the map clause, we use it, otherwise we9535// just do a default mapping.9536MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);9537if (!CI->capturesThis())9538MappedVarSet.insert(CI->getCapturedVar());9539else9540MappedVarSet.insert(nullptr);9541if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())9542MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);9543// Generate correct mapping for variables captured by reference in9544// lambdas.9545if (CI->capturesVariable())9546MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,9547CurInfo, LambdaPointers);9548}9549// We expect to have at least an element of information for this capture.9550assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&9551"Non-existing map pointer for capture!");9552assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&9553CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&9554CurInfo.BasePointers.size() == CurInfo.Types.size() &&9555CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&9556"Inconsistent map information sizes!");95579558// If there is an entry in PartialStruct it means we have a struct with9559// individual members mapped. Emit an extra combined entry.9560if (PartialStruct.Base.isValid()) {9561CombinedInfo.append(PartialStruct.PreliminaryMapData);9562MEHandler.emitCombinedEntry(9563CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),9564OMPBuilder, nullptr,9565!PartialStruct.PreliminaryMapData.BasePointers.empty());9566}95679568// We need to append the results of this capture to what we already have.9569CombinedInfo.append(CurInfo);9570}9571// Adjust MEMBER_OF flags for the lambdas captures.9572MEHandler.adjustMemberOfForLambdaCaptures(9573OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,9574CombinedInfo.Pointers, CombinedInfo.Types);9575// Map any list items in a map clause that were not captures because they9576// weren't referenced within the construct.9577MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);95789579CGOpenMPRuntime::TargetDataInfo Info;9580// Fill up the arrays and create the arguments.9581emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);9582bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=9583llvm::codegenoptions::NoDebugInfo;9584OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,9585EmitDebug,9586/*ForEndCall=*/false);95879588InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;9589InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,9590CGF.VoidPtrTy, CGM.getPointerAlign());9591InputInfo.PointersArray =9592Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());9593InputInfo.SizesArray =9594Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());9595InputInfo.MappersArray =9596Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());9597MapTypesArray = Info.RTArgs.MapTypesArray;9598MapNamesArray = Info.RTArgs.MapNamesArray;95999600auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,9601RequiresOuterTask, &CS, OffloadingMandatory, Device,9602OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,9603SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {9604bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;96059606if (IsReverseOffloading) {9607// Reverse offloading is not supported, so just execute on the host.9608// FIXME: This fallback solution is incorrect since it ignores the9609// OMP_TARGET_OFFLOAD environment variable. Instead it would be better to9610// assert here and ensure SEMA emits an error.9611emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,9612RequiresOuterTask, CS, OffloadingMandatory, CGF);9613return;9614}96159616bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();9617unsigned NumTargetItems = InputInfo.NumberOfTargetItems;96189619llvm::Value *BasePointersArray =9620InputInfo.BasePointersArray.emitRawPointer(CGF);9621llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);9622llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);9623llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);96249625auto &&EmitTargetCallFallbackCB =9626[&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,9627OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)9628-> llvm::OpenMPIRBuilder::InsertPointTy {9629CGF.Builder.restoreIP(IP);9630emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,9631RequiresOuterTask, CS, OffloadingMandatory, CGF);9632return CGF.Builder.saveIP();9633};96349635llvm::Value *DeviceID = emitDeviceID(Device, CGF);9636llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);9637llvm::Value *NumThreads =9638OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);9639llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());9640llvm::Value *NumIterations =9641OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);9642llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);9643llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(9644CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());96459646llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(9647BasePointersArray, PointersArray, SizesArray, MapTypesArray,9648nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);96499650llvm::OpenMPIRBuilder::TargetKernelArgs Args(9651NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,9652DynCGGroupMem, HasNoWait);96539654CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(9655CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,9656DeviceID, RTLoc, AllocaIP));9657};96589659if (RequiresOuterTask)9660CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);9661else9662OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);9663}96649665static void9666emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,9667const OMPExecutableDirective &D,9668llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,9669bool RequiresOuterTask, const CapturedStmt &CS,9670bool OffloadingMandatory, CodeGenFunction &CGF) {96719672// Notify that the host version must be executed.9673auto &&ElseGen =9674[&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,9675OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {9676emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,9677RequiresOuterTask, CS, OffloadingMandatory, CGF);9678};96799680if (RequiresOuterTask) {9681CodeGenFunction::OMPTargetDataInfo InputInfo;9682CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);9683} else {9684OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);9685}9686}96879688void CGOpenMPRuntime::emitTargetCall(9689CodeGenFunction &CGF, const OMPExecutableDirective &D,9690llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,9691llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,9692llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,9693const OMPLoopDirective &D)>9694SizeEmitter) {9695if (!CGF.HaveInsertPoint())9696return;96979698const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&9699CGM.getLangOpts().OpenMPOffloadMandatory;97009701assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");97029703const bool RequiresOuterTask =9704D.hasClausesOfKind<OMPDependClause>() ||9705D.hasClausesOfKind<OMPNowaitClause>() ||9706D.hasClausesOfKind<OMPInReductionClause>() ||9707(CGM.getLangOpts().OpenMP >= 51 &&9708needsTaskBasedThreadLimit(D.getDirectiveKind()) &&9709D.hasClausesOfKind<OMPThreadLimitClause>());9710llvm::SmallVector<llvm::Value *, 16> CapturedVars;9711const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);9712auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,9713PrePostActionTy &) {9714CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);9715};9716emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);97179718CodeGenFunction::OMPTargetDataInfo InputInfo;9719llvm::Value *MapTypesArray = nullptr;9720llvm::Value *MapNamesArray = nullptr;97219722auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,9723RequiresOuterTask, &CS, OffloadingMandatory, Device,9724OutlinedFnID, &InputInfo, &MapTypesArray,9725&MapNamesArray, SizeEmitter](CodeGenFunction &CGF,9726PrePostActionTy &) {9727emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,9728RequiresOuterTask, CS, OffloadingMandatory,9729Device, OutlinedFnID, InputInfo, MapTypesArray,9730MapNamesArray, SizeEmitter, CGF, CGM);9731};97329733auto &&TargetElseGen =9734[this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,9735OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {9736emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,9737CS, OffloadingMandatory, CGF);9738};97399740// If we have a target function ID it means that we need to support9741// offloading, otherwise, just execute on the host. We need to execute on host9742// regardless of the conditional in the if clause if, e.g., the user do not9743// specify target triples.9744if (OutlinedFnID) {9745if (IfCond) {9746emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);9747} else {9748RegionCodeGenTy ThenRCG(TargetThenGen);9749ThenRCG(CGF);9750}9751} else {9752RegionCodeGenTy ElseRCG(TargetElseGen);9753ElseRCG(CGF);9754}9755}97569757void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,9758StringRef ParentName) {9759if (!S)9760return;97619762// Codegen OMP target directives that offload compute to the device.9763bool RequiresDeviceCodegen =9764isa<OMPExecutableDirective>(S) &&9765isOpenMPTargetExecutionDirective(9766cast<OMPExecutableDirective>(S)->getDirectiveKind());97679768if (RequiresDeviceCodegen) {9769const auto &E = *cast<OMPExecutableDirective>(S);97709771llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(9772CGM, OMPBuilder, E.getBeginLoc(), ParentName);97739774// Is this a target region that should not be emitted as an entry point? If9775// so just signal we are done with this target region.9776if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))9777return;97789779switch (E.getDirectiveKind()) {9780case OMPD_target:9781CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,9782cast<OMPTargetDirective>(E));9783break;9784case OMPD_target_parallel:9785CodeGenFunction::EmitOMPTargetParallelDeviceFunction(9786CGM, ParentName, cast<OMPTargetParallelDirective>(E));9787break;9788case OMPD_target_teams:9789CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(9790CGM, ParentName, cast<OMPTargetTeamsDirective>(E));9791break;9792case OMPD_target_teams_distribute:9793CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(9794CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));9795break;9796case OMPD_target_teams_distribute_simd:9797CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(9798CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));9799break;9800case OMPD_target_parallel_for:9801CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(9802CGM, ParentName, cast<OMPTargetParallelForDirective>(E));9803break;9804case OMPD_target_parallel_for_simd:9805CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(9806CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));9807break;9808case OMPD_target_simd:9809CodeGenFunction::EmitOMPTargetSimdDeviceFunction(9810CGM, ParentName, cast<OMPTargetSimdDirective>(E));9811break;9812case OMPD_target_teams_distribute_parallel_for:9813CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(9814CGM, ParentName,9815cast<OMPTargetTeamsDistributeParallelForDirective>(E));9816break;9817case OMPD_target_teams_distribute_parallel_for_simd:9818CodeGenFunction::9819EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(9820CGM, ParentName,9821cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));9822break;9823case OMPD_target_teams_loop:9824CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(9825CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));9826break;9827case OMPD_target_parallel_loop:9828CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(9829CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));9830break;9831case OMPD_parallel:9832case OMPD_for:9833case OMPD_parallel_for:9834case OMPD_parallel_master:9835case OMPD_parallel_sections:9836case OMPD_for_simd:9837case OMPD_parallel_for_simd:9838case OMPD_cancel:9839case OMPD_cancellation_point:9840case OMPD_ordered:9841case OMPD_threadprivate:9842case OMPD_allocate:9843case OMPD_task:9844case OMPD_simd:9845case OMPD_tile:9846case OMPD_unroll:9847case OMPD_sections:9848case OMPD_section:9849case OMPD_single:9850case OMPD_master:9851case OMPD_critical:9852case OMPD_taskyield:9853case OMPD_barrier:9854case OMPD_taskwait:9855case OMPD_taskgroup:9856case OMPD_atomic:9857case OMPD_flush:9858case OMPD_depobj:9859case OMPD_scan:9860case OMPD_teams:9861case OMPD_target_data:9862case OMPD_target_exit_data:9863case OMPD_target_enter_data:9864case OMPD_distribute:9865case OMPD_distribute_simd:9866case OMPD_distribute_parallel_for:9867case OMPD_distribute_parallel_for_simd:9868case OMPD_teams_distribute:9869case OMPD_teams_distribute_simd:9870case OMPD_teams_distribute_parallel_for:9871case OMPD_teams_distribute_parallel_for_simd:9872case OMPD_target_update:9873case OMPD_declare_simd:9874case OMPD_declare_variant:9875case OMPD_begin_declare_variant:9876case OMPD_end_declare_variant:9877case OMPD_declare_target:9878case OMPD_end_declare_target:9879case OMPD_declare_reduction:9880case OMPD_declare_mapper:9881case OMPD_taskloop:9882case OMPD_taskloop_simd:9883case OMPD_master_taskloop:9884case OMPD_master_taskloop_simd:9885case OMPD_parallel_master_taskloop:9886case OMPD_parallel_master_taskloop_simd:9887case OMPD_requires:9888case OMPD_metadirective:9889case OMPD_unknown:9890default:9891llvm_unreachable("Unknown target directive for OpenMP device codegen.");9892}9893return;9894}98959896if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {9897if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())9898return;98999900scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);9901return;9902}99039904// If this is a lambda function, look into its body.9905if (const auto *L = dyn_cast<LambdaExpr>(S))9906S = L->getBody();99079908// Keep looking for target regions recursively.9909for (const Stmt *II : S->children())9910scanForTargetRegionsFunctions(II, ParentName);9911}99129913static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {9914std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =9915OMPDeclareTargetDeclAttr::getDeviceType(VD);9916if (!DevTy)9917return false;9918// Do not emit device_type(nohost) functions for the host.9919if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)9920return true;9921// Do not emit device_type(host) functions for the device.9922if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)9923return true;9924return false;9925}99269927bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {9928// If emitting code for the host, we do not process FD here. Instead we do9929// the normal code generation.9930if (!CGM.getLangOpts().OpenMPIsTargetDevice) {9931if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))9932if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),9933CGM.getLangOpts().OpenMPIsTargetDevice))9934return true;9935return false;9936}99379938const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());9939// Try to detect target regions in the function.9940if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {9941StringRef Name = CGM.getMangledName(GD);9942scanForTargetRegionsFunctions(FD->getBody(), Name);9943if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),9944CGM.getLangOpts().OpenMPIsTargetDevice))9945return true;9946}99479948// Do not to emit function if it is not marked as declare target.9949return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&9950AlreadyEmittedTargetDecls.count(VD) == 0;9951}99529953bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {9954if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),9955CGM.getLangOpts().OpenMPIsTargetDevice))9956return true;99579958if (!CGM.getLangOpts().OpenMPIsTargetDevice)9959return false;99609961// Check if there are Ctors/Dtors in this declaration and look for target9962// regions in it. We use the complete variant to produce the kernel name9963// mangling.9964QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();9965if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {9966for (const CXXConstructorDecl *Ctor : RD->ctors()) {9967StringRef ParentName =9968CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));9969scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);9970}9971if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {9972StringRef ParentName =9973CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));9974scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);9975}9976}99779978// Do not to emit variable if it is not marked as declare target.9979std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =9980OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(9981cast<VarDecl>(GD.getDecl()));9982if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||9983((*Res == OMPDeclareTargetDeclAttr::MT_To ||9984*Res == OMPDeclareTargetDeclAttr::MT_Enter) &&9985HasRequiresUnifiedSharedMemory)) {9986DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));9987return true;9988}9989return false;9990}99919992void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,9993llvm::Constant *Addr) {9994if (CGM.getLangOpts().OMPTargetTriples.empty() &&9995!CGM.getLangOpts().OpenMPIsTargetDevice)9996return;99979998std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =9999OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);1000010001// If this is an 'extern' declaration we defer to the canonical definition and10002// do not emit an offloading entry.10003if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&10004VD->hasExternalStorage())10005return;1000610007if (!Res) {10008if (CGM.getLangOpts().OpenMPIsTargetDevice) {10009// Register non-target variables being emitted in device code (debug info10010// may cause this).10011StringRef VarName = CGM.getMangledName(VD);10012EmittedNonTargetVariables.try_emplace(VarName, Addr);10013}10014return;10015}1001610017auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };10018auto LinkageForVariable = [&VD, this]() {10019return CGM.getLLVMLinkageVarDefinition(VD);10020};1002110022std::vector<llvm::GlobalVariable *> GeneratedRefs;10023OMPBuilder.registerTargetGlobalVariable(10024convertCaptureClause(VD), convertDeviceClause(VD),10025VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,10026VD->isExternallyVisible(),10027getEntryInfoFromPresumedLoc(CGM, OMPBuilder,10028VD->getCanonicalDecl()->getBeginLoc()),10029CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,10030CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,10031CGM.getTypes().ConvertTypeForMem(10032CGM.getContext().getPointerType(VD->getType())),10033Addr);1003410035for (auto *ref : GeneratedRefs)10036CGM.addCompilerUsedGlobal(ref);10037}1003810039bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {10040if (isa<FunctionDecl>(GD.getDecl()) ||10041isa<OMPDeclareReductionDecl>(GD.getDecl()))10042return emitTargetFunctions(GD);1004310044return emitTargetGlobalVariable(GD);10045}1004610047void CGOpenMPRuntime::emitDeferredTargetDecls() const {10048for (const VarDecl *VD : DeferredGlobalVariables) {10049std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =10050OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);10051if (!Res)10052continue;10053if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||10054*Res == OMPDeclareTargetDeclAttr::MT_Enter) &&10055!HasRequiresUnifiedSharedMemory) {10056CGM.EmitGlobal(VD);10057} else {10058assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||10059((*Res == OMPDeclareTargetDeclAttr::MT_To ||10060*Res == OMPDeclareTargetDeclAttr::MT_Enter) &&10061HasRequiresUnifiedSharedMemory)) &&10062"Expected link clause or to clause with unified memory.");10063(void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);10064}10065}10066}1006710068void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(10069CodeGenFunction &CGF, const OMPExecutableDirective &D) const {10070assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&10071" Expected target-based directive.");10072}1007310074void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {10075for (const OMPClause *Clause : D->clauselists()) {10076if (Clause->getClauseKind() == OMPC_unified_shared_memory) {10077HasRequiresUnifiedSharedMemory = true;10078OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);10079} else if (const auto *AC =10080dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {10081switch (AC->getAtomicDefaultMemOrderKind()) {10082case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:10083RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;10084break;10085case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:10086RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;10087break;10088case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:10089RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;10090break;10091case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:10092break;10093}10094}10095}10096}1009710098llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {10099return RequiresAtomicOrdering;10100}1010110102bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,10103LangAS &AS) {10104if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())10105return false;10106const auto *A = VD->getAttr<OMPAllocateDeclAttr>();10107switch(A->getAllocatorType()) {10108case OMPAllocateDeclAttr::OMPNullMemAlloc:10109case OMPAllocateDeclAttr::OMPDefaultMemAlloc:10110// Not supported, fallback to the default mem space.10111case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:10112case OMPAllocateDeclAttr::OMPCGroupMemAlloc:10113case OMPAllocateDeclAttr::OMPHighBWMemAlloc:10114case OMPAllocateDeclAttr::OMPLowLatMemAlloc:10115case OMPAllocateDeclAttr::OMPThreadMemAlloc:10116case OMPAllocateDeclAttr::OMPConstMemAlloc:10117case OMPAllocateDeclAttr::OMPPTeamMemAlloc:10118AS = LangAS::Default;10119return true;10120case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:10121llvm_unreachable("Expected predefined allocator for the variables with the "10122"static storage.");10123}10124return false;10125}1012610127bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {10128return HasRequiresUnifiedSharedMemory;10129}1013010131CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(10132CodeGenModule &CGM)10133: CGM(CGM) {10134if (CGM.getLangOpts().OpenMPIsTargetDevice) {10135SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;10136CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;10137}10138}1013910140CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {10141if (CGM.getLangOpts().OpenMPIsTargetDevice)10142CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;10143}1014410145bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {10146if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)10147return true;1014810149const auto *D = cast<FunctionDecl>(GD.getDecl());10150// Do not to emit function if it is marked as declare target as it was already10151// emitted.10152if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {10153if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {10154if (auto *F = dyn_cast_or_null<llvm::Function>(10155CGM.GetGlobalValue(CGM.getMangledName(GD))))10156return !F->isDeclaration();10157return false;10158}10159return true;10160}1016110162return !AlreadyEmittedTargetDecls.insert(D).second;10163}1016410165void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,10166const OMPExecutableDirective &D,10167SourceLocation Loc,10168llvm::Function *OutlinedFn,10169ArrayRef<llvm::Value *> CapturedVars) {10170if (!CGF.HaveInsertPoint())10171return;1017210173llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);10174CodeGenFunction::RunCleanupsScope Scope(CGF);1017510176// Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);10177llvm::Value *Args[] = {10178RTLoc,10179CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars10180CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};10181llvm::SmallVector<llvm::Value *, 16> RealArgs;10182RealArgs.append(std::begin(Args), std::end(Args));10183RealArgs.append(CapturedVars.begin(), CapturedVars.end());1018410185llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(10186CGM.getModule(), OMPRTL___kmpc_fork_teams);10187CGF.EmitRuntimeCall(RTLFn, RealArgs);10188}1018910190void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,10191const Expr *NumTeams,10192const Expr *ThreadLimit,10193SourceLocation Loc) {10194if (!CGF.HaveInsertPoint())10195return;1019610197llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);1019810199llvm::Value *NumTeamsVal =10200NumTeams10201? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),10202CGF.CGM.Int32Ty, /* isSigned = */ true)10203: CGF.Builder.getInt32(0);1020410205llvm::Value *ThreadLimitVal =10206ThreadLimit10207? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),10208CGF.CGM.Int32Ty, /* isSigned = */ true)10209: CGF.Builder.getInt32(0);1021010211// Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)10212llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,10213ThreadLimitVal};10214CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(10215CGM.getModule(), OMPRTL___kmpc_push_num_teams),10216PushNumTeamsArgs);10217}1021810219void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,10220const Expr *ThreadLimit,10221SourceLocation Loc) {10222llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);10223llvm::Value *ThreadLimitVal =10224ThreadLimit10225? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),10226CGF.CGM.Int32Ty, /* isSigned = */ true)10227: CGF.Builder.getInt32(0);1022810229// Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)10230llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),10231ThreadLimitVal};10232CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(10233CGM.getModule(), OMPRTL___kmpc_set_thread_limit),10234ThreadLimitArgs);10235}1023610237void CGOpenMPRuntime::emitTargetDataCalls(10238CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,10239const Expr *Device, const RegionCodeGenTy &CodeGen,10240CGOpenMPRuntime::TargetDataInfo &Info) {10241if (!CGF.HaveInsertPoint())10242return;1024310244// Action used to replace the default codegen action and turn privatization10245// off.10246PrePostActionTy NoPrivAction;1024710248using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;1024910250llvm::Value *IfCondVal = nullptr;10251if (IfCond)10252IfCondVal = CGF.EvaluateExprAsBool(IfCond);1025310254// Emit device ID if any.10255llvm::Value *DeviceID = nullptr;10256if (Device) {10257DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),10258CGF.Int64Ty, /*isSigned=*/true);10259} else {10260DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);10261}1026210263// Fill up the arrays with all the mapped variables.10264MappableExprsHandler::MapCombinedInfoTy CombinedInfo;10265auto GenMapInfoCB =10266[&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {10267CGF.Builder.restoreIP(CodeGenIP);10268// Get map clause information.10269MappableExprsHandler MEHandler(D, CGF);10270MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);1027110272auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {10273return emitMappingInformation(CGF, OMPBuilder, MapExpr);10274};10275if (CGM.getCodeGenOpts().getDebugInfo() !=10276llvm::codegenoptions::NoDebugInfo) {10277CombinedInfo.Names.resize(CombinedInfo.Exprs.size());10278llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),10279FillInfoMap);10280}1028110282return CombinedInfo;10283};10284using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;10285auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {10286CGF.Builder.restoreIP(CodeGenIP);10287switch (BodyGenType) {10288case BodyGenTy::Priv:10289if (!Info.CaptureDeviceAddrMap.empty())10290CodeGen(CGF);10291break;10292case BodyGenTy::DupNoPriv:10293if (!Info.CaptureDeviceAddrMap.empty()) {10294CodeGen.setAction(NoPrivAction);10295CodeGen(CGF);10296}10297break;10298case BodyGenTy::NoPriv:10299if (Info.CaptureDeviceAddrMap.empty()) {10300CodeGen.setAction(NoPrivAction);10301CodeGen(CGF);10302}10303break;10304}10305return InsertPointTy(CGF.Builder.GetInsertBlock(),10306CGF.Builder.GetInsertPoint());10307};1030810309auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {10310if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {10311Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);10312}10313};1031410315auto CustomMapperCB = [&](unsigned int I) {10316llvm::Value *MFunc = nullptr;10317if (CombinedInfo.Mappers[I]) {10318Info.HasMapper = true;10319MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(10320cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));10321}10322return MFunc;10323};1032410325// Source location for the ident struct10326llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());1032710328InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),10329CGF.AllocaInsertPt->getIterator());10330InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),10331CGF.Builder.GetInsertPoint());10332llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);10333CGF.Builder.restoreIP(OMPBuilder.createTargetData(10334OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,10335/*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));10336}1033710338void CGOpenMPRuntime::emitTargetDataStandAloneCall(10339CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,10340const Expr *Device) {10341if (!CGF.HaveInsertPoint())10342return;1034310344assert((isa<OMPTargetEnterDataDirective>(D) ||10345isa<OMPTargetExitDataDirective>(D) ||10346isa<OMPTargetUpdateDirective>(D)) &&10347"Expecting either target enter, exit data, or update directives.");1034810349CodeGenFunction::OMPTargetDataInfo InputInfo;10350llvm::Value *MapTypesArray = nullptr;10351llvm::Value *MapNamesArray = nullptr;10352// Generate the code for the opening of the data environment.10353auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,10354&MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {10355// Emit device ID if any.10356llvm::Value *DeviceID = nullptr;10357if (Device) {10358DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),10359CGF.Int64Ty, /*isSigned=*/true);10360} else {10361DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);10362}1036310364// Emit the number of elements in the offloading arrays.10365llvm::Constant *PointerNum =10366CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);1036710368// Source location for the ident struct10369llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());1037010371SmallVector<llvm::Value *, 13> OffloadingArgs(10372{RTLoc, DeviceID, PointerNum,10373InputInfo.BasePointersArray.emitRawPointer(CGF),10374InputInfo.PointersArray.emitRawPointer(CGF),10375InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,10376InputInfo.MappersArray.emitRawPointer(CGF)});1037710378// Select the right runtime function call for each standalone10379// directive.10380const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();10381RuntimeFunction RTLFn;10382switch (D.getDirectiveKind()) {10383case OMPD_target_enter_data:10384RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper10385: OMPRTL___tgt_target_data_begin_mapper;10386break;10387case OMPD_target_exit_data:10388RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper10389: OMPRTL___tgt_target_data_end_mapper;10390break;10391case OMPD_target_update:10392RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper10393: OMPRTL___tgt_target_data_update_mapper;10394break;10395case OMPD_parallel:10396case OMPD_for:10397case OMPD_parallel_for:10398case OMPD_parallel_master:10399case OMPD_parallel_sections:10400case OMPD_for_simd:10401case OMPD_parallel_for_simd:10402case OMPD_cancel:10403case OMPD_cancellation_point:10404case OMPD_ordered:10405case OMPD_threadprivate:10406case OMPD_allocate:10407case OMPD_task:10408case OMPD_simd:10409case OMPD_tile:10410case OMPD_unroll:10411case OMPD_sections:10412case OMPD_section:10413case OMPD_single:10414case OMPD_master:10415case OMPD_critical:10416case OMPD_taskyield:10417case OMPD_barrier:10418case OMPD_taskwait:10419case OMPD_taskgroup:10420case OMPD_atomic:10421case OMPD_flush:10422case OMPD_depobj:10423case OMPD_scan:10424case OMPD_teams:10425case OMPD_target_data:10426case OMPD_distribute:10427case OMPD_distribute_simd:10428case OMPD_distribute_parallel_for:10429case OMPD_distribute_parallel_for_simd:10430case OMPD_teams_distribute:10431case OMPD_teams_distribute_simd:10432case OMPD_teams_distribute_parallel_for:10433case OMPD_teams_distribute_parallel_for_simd:10434case OMPD_declare_simd:10435case OMPD_declare_variant:10436case OMPD_begin_declare_variant:10437case OMPD_end_declare_variant:10438case OMPD_declare_target:10439case OMPD_end_declare_target:10440case OMPD_declare_reduction:10441case OMPD_declare_mapper:10442case OMPD_taskloop:10443case OMPD_taskloop_simd:10444case OMPD_master_taskloop:10445case OMPD_master_taskloop_simd:10446case OMPD_parallel_master_taskloop:10447case OMPD_parallel_master_taskloop_simd:10448case OMPD_target:10449case OMPD_target_simd:10450case OMPD_target_teams_distribute:10451case OMPD_target_teams_distribute_simd:10452case OMPD_target_teams_distribute_parallel_for:10453case OMPD_target_teams_distribute_parallel_for_simd:10454case OMPD_target_teams:10455case OMPD_target_parallel:10456case OMPD_target_parallel_for:10457case OMPD_target_parallel_for_simd:10458case OMPD_requires:10459case OMPD_metadirective:10460case OMPD_unknown:10461default:10462llvm_unreachable("Unexpected standalone target data directive.");10463break;10464}10465if (HasNowait) {10466OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));10467OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));10468OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));10469OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));10470}10471CGF.EmitRuntimeCall(10472OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),10473OffloadingArgs);10474};1047510476auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,10477&MapNamesArray](CodeGenFunction &CGF,10478PrePostActionTy &) {10479// Fill up the arrays with all the mapped variables.10480MappableExprsHandler::MapCombinedInfoTy CombinedInfo;1048110482// Get map clause information.10483MappableExprsHandler MEHandler(D, CGF);10484MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);1048510486CGOpenMPRuntime::TargetDataInfo Info;10487// Fill up the arrays and create the arguments.10488emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,10489/*IsNonContiguous=*/true);10490bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||10491D.hasClausesOfKind<OMPNowaitClause>();10492bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=10493llvm::codegenoptions::NoDebugInfo;10494OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,10495EmitDebug,10496/*ForEndCall=*/false);10497InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;10498InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,10499CGF.VoidPtrTy, CGM.getPointerAlign());10500InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,10501CGM.getPointerAlign());10502InputInfo.SizesArray =10503Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());10504InputInfo.MappersArray =10505Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());10506MapTypesArray = Info.RTArgs.MapTypesArray;10507MapNamesArray = Info.RTArgs.MapNamesArray;10508if (RequiresOuterTask)10509CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);10510else10511emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);10512};1051310514if (IfCond) {10515emitIfClause(CGF, IfCond, TargetThenGen,10516[](CodeGenFunction &CGF, PrePostActionTy &) {});10517} else {10518RegionCodeGenTy ThenRCG(TargetThenGen);10519ThenRCG(CGF);10520}10521}1052210523namespace {10524/// Kind of parameter in a function with 'declare simd' directive.10525enum ParamKindTy {10526Linear,10527LinearRef,10528LinearUVal,10529LinearVal,10530Uniform,10531Vector,10532};10533/// Attribute set of the parameter.10534struct ParamAttrTy {10535ParamKindTy Kind = Vector;10536llvm::APSInt StrideOrArg;10537llvm::APSInt Alignment;10538bool HasVarStride = false;10539};10540} // namespace1054110542static unsigned evaluateCDTSize(const FunctionDecl *FD,10543ArrayRef<ParamAttrTy> ParamAttrs) {10544// Every vector variant of a SIMD-enabled function has a vector length (VLEN).10545// If OpenMP clause "simdlen" is used, the VLEN is the value of the argument10546// of that clause. The VLEN value must be power of 2.10547// In other case the notion of the function`s "characteristic data type" (CDT)10548// is used to compute the vector length.10549// CDT is defined in the following order:10550// a) For non-void function, the CDT is the return type.10551// b) If the function has any non-uniform, non-linear parameters, then the10552// CDT is the type of the first such parameter.10553// c) If the CDT determined by a) or b) above is struct, union, or class10554// type which is pass-by-value (except for the type that maps to the10555// built-in complex data type), the characteristic data type is int.10556// d) If none of the above three cases is applicable, the CDT is int.10557// The VLEN is then determined based on the CDT and the size of vector10558// register of that ISA for which current vector version is generated. The10559// VLEN is computed using the formula below:10560// VLEN = sizeof(vector_register) / sizeof(CDT),10561// where vector register size specified in section 3.2.1 Registers and the10562// Stack Frame of original AMD64 ABI document.10563QualType RetType = FD->getReturnType();10564if (RetType.isNull())10565return 0;10566ASTContext &C = FD->getASTContext();10567QualType CDT;10568if (!RetType.isNull() && !RetType->isVoidType()) {10569CDT = RetType;10570} else {10571unsigned Offset = 0;10572if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {10573if (ParamAttrs[Offset].Kind == Vector)10574CDT = C.getPointerType(C.getRecordType(MD->getParent()));10575++Offset;10576}10577if (CDT.isNull()) {10578for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {10579if (ParamAttrs[I + Offset].Kind == Vector) {10580CDT = FD->getParamDecl(I)->getType();10581break;10582}10583}10584}10585}10586if (CDT.isNull())10587CDT = C.IntTy;10588CDT = CDT->getCanonicalTypeUnqualified();10589if (CDT->isRecordType() || CDT->isUnionType())10590CDT = C.IntTy;10591return C.getTypeSize(CDT);10592}1059310594/// Mangle the parameter part of the vector function name according to10595/// their OpenMP classification. The mangling function is defined in10596/// section 4.5 of the AAVFABI(2021Q1).10597static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {10598SmallString<256> Buffer;10599llvm::raw_svector_ostream Out(Buffer);10600for (const auto &ParamAttr : ParamAttrs) {10601switch (ParamAttr.Kind) {10602case Linear:10603Out << 'l';10604break;10605case LinearRef:10606Out << 'R';10607break;10608case LinearUVal:10609Out << 'U';10610break;10611case LinearVal:10612Out << 'L';10613break;10614case Uniform:10615Out << 'u';10616break;10617case Vector:10618Out << 'v';10619break;10620}10621if (ParamAttr.HasVarStride)10622Out << "s" << ParamAttr.StrideOrArg;10623else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||10624ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {10625// Don't print the step value if it is not present or if it is10626// equal to 1.10627if (ParamAttr.StrideOrArg < 0)10628Out << 'n' << -ParamAttr.StrideOrArg;10629else if (ParamAttr.StrideOrArg != 1)10630Out << ParamAttr.StrideOrArg;10631}1063210633if (!!ParamAttr.Alignment)10634Out << 'a' << ParamAttr.Alignment;10635}1063610637return std::string(Out.str());10638}1063910640static void10641emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,10642const llvm::APSInt &VLENVal,10643ArrayRef<ParamAttrTy> ParamAttrs,10644OMPDeclareSimdDeclAttr::BranchStateTy State) {10645struct ISADataTy {10646char ISA;10647unsigned VecRegSize;10648};10649ISADataTy ISAData[] = {10650{10651'b', 12810652}, // SSE10653{10654'c', 25610655}, // AVX10656{10657'd', 25610658}, // AVX210659{10660'e', 51210661}, // AVX51210662};10663llvm::SmallVector<char, 2> Masked;10664switch (State) {10665case OMPDeclareSimdDeclAttr::BS_Undefined:10666Masked.push_back('N');10667Masked.push_back('M');10668break;10669case OMPDeclareSimdDeclAttr::BS_Notinbranch:10670Masked.push_back('N');10671break;10672case OMPDeclareSimdDeclAttr::BS_Inbranch:10673Masked.push_back('M');10674break;10675}10676for (char Mask : Masked) {10677for (const ISADataTy &Data : ISAData) {10678SmallString<256> Buffer;10679llvm::raw_svector_ostream Out(Buffer);10680Out << "_ZGV" << Data.ISA << Mask;10681if (!VLENVal) {10682unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);10683assert(NumElts && "Non-zero simdlen/cdtsize expected");10684Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);10685} else {10686Out << VLENVal;10687}10688Out << mangleVectorParameters(ParamAttrs);10689Out << '_' << Fn->getName();10690Fn->addFnAttr(Out.str());10691}10692}10693}1069410695// This are the Functions that are needed to mangle the name of the10696// vector functions generated by the compiler, according to the rules10697// defined in the "Vector Function ABI specifications for AArch64",10698// available at10699// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.1070010701/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).10702static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {10703QT = QT.getCanonicalType();1070410705if (QT->isVoidType())10706return false;1070710708if (Kind == ParamKindTy::Uniform)10709return false;1071010711if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)10712return false;1071310714if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&10715!QT->isReferenceType())10716return false;1071710718return true;10719}1072010721/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.10722static bool getAArch64PBV(QualType QT, ASTContext &C) {10723QT = QT.getCanonicalType();10724unsigned Size = C.getTypeSize(QT);1072510726// Only scalars and complex within 16 bytes wide set PVB to true.10727if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)10728return false;1072910730if (QT->isFloatingType())10731return true;1073210733if (QT->isIntegerType())10734return true;1073510736if (QT->isPointerType())10737return true;1073810739// TODO: Add support for complex types (section 3.1.2, item 2).1074010741return false;10742}1074310744/// Computes the lane size (LS) of a return type or of an input parameter,10745/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.10746/// TODO: Add support for references, section 3.2.1, item 1.10747static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {10748if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {10749QualType PTy = QT.getCanonicalType()->getPointeeType();10750if (getAArch64PBV(PTy, C))10751return C.getTypeSize(PTy);10752}10753if (getAArch64PBV(QT, C))10754return C.getTypeSize(QT);1075510756return C.getTypeSize(C.getUIntPtrType());10757}1075810759// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the10760// signature of the scalar function, as defined in 3.2.2 of the10761// AAVFABI.10762static std::tuple<unsigned, unsigned, bool>10763getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {10764QualType RetType = FD->getReturnType().getCanonicalType();1076510766ASTContext &C = FD->getASTContext();1076710768bool OutputBecomesInput = false;1076910770llvm::SmallVector<unsigned, 8> Sizes;10771if (!RetType->isVoidType()) {10772Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));10773if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))10774OutputBecomesInput = true;10775}10776for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {10777QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();10778Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));10779}1078010781assert(!Sizes.empty() && "Unable to determine NDS and WDS.");10782// The LS of a function parameter / return value can only be a power10783// of 2, starting from 8 bits, up to 128.10784assert(llvm::all_of(Sizes,10785[](unsigned Size) {10786return Size == 8 || Size == 16 || Size == 32 ||10787Size == 64 || Size == 128;10788}) &&10789"Invalid size");1079010791return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),10792*std::max_element(std::begin(Sizes), std::end(Sizes)),10793OutputBecomesInput);10794}1079510796// Function used to add the attribute. The parameter `VLEN` is10797// templated to allow the use of "x" when targeting scalable functions10798// for SVE.10799template <typename T>10800static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,10801char ISA, StringRef ParSeq,10802StringRef MangledName, bool OutputBecomesInput,10803llvm::Function *Fn) {10804SmallString<256> Buffer;10805llvm::raw_svector_ostream Out(Buffer);10806Out << Prefix << ISA << LMask << VLEN;10807if (OutputBecomesInput)10808Out << "v";10809Out << ParSeq << "_" << MangledName;10810Fn->addFnAttr(Out.str());10811}1081210813// Helper function to generate the Advanced SIMD names depending on10814// the value of the NDS when simdlen is not present.10815static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,10816StringRef Prefix, char ISA,10817StringRef ParSeq, StringRef MangledName,10818bool OutputBecomesInput,10819llvm::Function *Fn) {10820switch (NDS) {10821case 8:10822addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,10823OutputBecomesInput, Fn);10824addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,10825OutputBecomesInput, Fn);10826break;10827case 16:10828addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,10829OutputBecomesInput, Fn);10830addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,10831OutputBecomesInput, Fn);10832break;10833case 32:10834addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,10835OutputBecomesInput, Fn);10836addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,10837OutputBecomesInput, Fn);10838break;10839case 64:10840case 128:10841addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,10842OutputBecomesInput, Fn);10843break;10844default:10845llvm_unreachable("Scalar type is too wide.");10846}10847}1084810849/// Emit vector function attributes for AArch64, as defined in the AAVFABI.10850static void emitAArch64DeclareSimdFunction(10851CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,10852ArrayRef<ParamAttrTy> ParamAttrs,10853OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,10854char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {1085510856// Get basic data for building the vector signature.10857const auto Data = getNDSWDS(FD, ParamAttrs);10858const unsigned NDS = std::get<0>(Data);10859const unsigned WDS = std::get<1>(Data);10860const bool OutputBecomesInput = std::get<2>(Data);1086110862// Check the values provided via `simdlen` by the user.10863// 1. A `simdlen(1)` doesn't produce vector signatures,10864if (UserVLEN == 1) {10865unsigned DiagID = CGM.getDiags().getCustomDiagID(10866DiagnosticsEngine::Warning,10867"The clause simdlen(1) has no effect when targeting aarch64.");10868CGM.getDiags().Report(SLoc, DiagID);10869return;10870}1087110872// 2. Section 3.3.1, item 1: user input must be a power of 2 for10873// Advanced SIMD output.10874if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {10875unsigned DiagID = CGM.getDiags().getCustomDiagID(10876DiagnosticsEngine::Warning, "The value specified in simdlen must be a "10877"power of 2 when targeting Advanced SIMD.");10878CGM.getDiags().Report(SLoc, DiagID);10879return;10880}1088110882// 3. Section 3.4.1. SVE fixed lengh must obey the architectural10883// limits.10884if (ISA == 's' && UserVLEN != 0) {10885if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {10886unsigned DiagID = CGM.getDiags().getCustomDiagID(10887DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "10888"lanes in the architectural constraints "10889"for SVE (min is 128-bit, max is "10890"2048-bit, by steps of 128-bit)");10891CGM.getDiags().Report(SLoc, DiagID) << WDS;10892return;10893}10894}1089510896// Sort out parameter sequence.10897const std::string ParSeq = mangleVectorParameters(ParamAttrs);10898StringRef Prefix = "_ZGV";10899// Generate simdlen from user input (if any).10900if (UserVLEN) {10901if (ISA == 's') {10902// SVE generates only a masked function.10903addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,10904OutputBecomesInput, Fn);10905} else {10906assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");10907// Advanced SIMD generates one or two functions, depending on10908// the `[not]inbranch` clause.10909switch (State) {10910case OMPDeclareSimdDeclAttr::BS_Undefined:10911addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,10912OutputBecomesInput, Fn);10913addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,10914OutputBecomesInput, Fn);10915break;10916case OMPDeclareSimdDeclAttr::BS_Notinbranch:10917addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,10918OutputBecomesInput, Fn);10919break;10920case OMPDeclareSimdDeclAttr::BS_Inbranch:10921addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,10922OutputBecomesInput, Fn);10923break;10924}10925}10926} else {10927// If no user simdlen is provided, follow the AAVFABI rules for10928// generating the vector length.10929if (ISA == 's') {10930// SVE, section 3.4.1, item 1.10931addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,10932OutputBecomesInput, Fn);10933} else {10934assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");10935// Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or10936// two vector names depending on the use of the clause10937// `[not]inbranch`.10938switch (State) {10939case OMPDeclareSimdDeclAttr::BS_Undefined:10940addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,10941OutputBecomesInput, Fn);10942addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,10943OutputBecomesInput, Fn);10944break;10945case OMPDeclareSimdDeclAttr::BS_Notinbranch:10946addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,10947OutputBecomesInput, Fn);10948break;10949case OMPDeclareSimdDeclAttr::BS_Inbranch:10950addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,10951OutputBecomesInput, Fn);10952break;10953}10954}10955}10956}1095710958void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,10959llvm::Function *Fn) {10960ASTContext &C = CGM.getContext();10961FD = FD->getMostRecentDecl();10962while (FD) {10963// Map params to their positions in function decl.10964llvm::DenseMap<const Decl *, unsigned> ParamPositions;10965if (isa<CXXMethodDecl>(FD))10966ParamPositions.try_emplace(FD, 0);10967unsigned ParamPos = ParamPositions.size();10968for (const ParmVarDecl *P : FD->parameters()) {10969ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);10970++ParamPos;10971}10972for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {10973llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());10974// Mark uniform parameters.10975for (const Expr *E : Attr->uniforms()) {10976E = E->IgnoreParenImpCasts();10977unsigned Pos;10978if (isa<CXXThisExpr>(E)) {10979Pos = ParamPositions[FD];10980} else {10981const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())10982->getCanonicalDecl();10983auto It = ParamPositions.find(PVD);10984assert(It != ParamPositions.end() && "Function parameter not found");10985Pos = It->second;10986}10987ParamAttrs[Pos].Kind = Uniform;10988}10989// Get alignment info.10990auto *NI = Attr->alignments_begin();10991for (const Expr *E : Attr->aligneds()) {10992E = E->IgnoreParenImpCasts();10993unsigned Pos;10994QualType ParmTy;10995if (isa<CXXThisExpr>(E)) {10996Pos = ParamPositions[FD];10997ParmTy = E->getType();10998} else {10999const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())11000->getCanonicalDecl();11001auto It = ParamPositions.find(PVD);11002assert(It != ParamPositions.end() && "Function parameter not found");11003Pos = It->second;11004ParmTy = PVD->getType();11005}11006ParamAttrs[Pos].Alignment =11007(*NI)11008? (*NI)->EvaluateKnownConstInt(C)11009: llvm::APSInt::getUnsigned(11010C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))11011.getQuantity());11012++NI;11013}11014// Mark linear parameters.11015auto *SI = Attr->steps_begin();11016auto *MI = Attr->modifiers_begin();11017for (const Expr *E : Attr->linears()) {11018E = E->IgnoreParenImpCasts();11019unsigned Pos;11020bool IsReferenceType = false;11021// Rescaling factor needed to compute the linear parameter11022// value in the mangled name.11023unsigned PtrRescalingFactor = 1;11024if (isa<CXXThisExpr>(E)) {11025Pos = ParamPositions[FD];11026auto *P = cast<PointerType>(E->getType());11027PtrRescalingFactor = CGM.getContext()11028.getTypeSizeInChars(P->getPointeeType())11029.getQuantity();11030} else {11031const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())11032->getCanonicalDecl();11033auto It = ParamPositions.find(PVD);11034assert(It != ParamPositions.end() && "Function parameter not found");11035Pos = It->second;11036if (auto *P = dyn_cast<PointerType>(PVD->getType()))11037PtrRescalingFactor = CGM.getContext()11038.getTypeSizeInChars(P->getPointeeType())11039.getQuantity();11040else if (PVD->getType()->isReferenceType()) {11041IsReferenceType = true;11042PtrRescalingFactor =11043CGM.getContext()11044.getTypeSizeInChars(PVD->getType().getNonReferenceType())11045.getQuantity();11046}11047}11048ParamAttrTy &ParamAttr = ParamAttrs[Pos];11049if (*MI == OMPC_LINEAR_ref)11050ParamAttr.Kind = LinearRef;11051else if (*MI == OMPC_LINEAR_uval)11052ParamAttr.Kind = LinearUVal;11053else if (IsReferenceType)11054ParamAttr.Kind = LinearVal;11055else11056ParamAttr.Kind = Linear;11057// Assuming a stride of 1, for `linear` without modifiers.11058ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);11059if (*SI) {11060Expr::EvalResult Result;11061if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {11062if (const auto *DRE =11063cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {11064if (const auto *StridePVD =11065dyn_cast<ParmVarDecl>(DRE->getDecl())) {11066ParamAttr.HasVarStride = true;11067auto It = ParamPositions.find(StridePVD->getCanonicalDecl());11068assert(It != ParamPositions.end() &&11069"Function parameter not found");11070ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);11071}11072}11073} else {11074ParamAttr.StrideOrArg = Result.Val.getInt();11075}11076}11077// If we are using a linear clause on a pointer, we need to11078// rescale the value of linear_step with the byte size of the11079// pointee type.11080if (!ParamAttr.HasVarStride &&11081(ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))11082ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;11083++SI;11084++MI;11085}11086llvm::APSInt VLENVal;11087SourceLocation ExprLoc;11088const Expr *VLENExpr = Attr->getSimdlen();11089if (VLENExpr) {11090VLENVal = VLENExpr->EvaluateKnownConstInt(C);11091ExprLoc = VLENExpr->getExprLoc();11092}11093OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();11094if (CGM.getTriple().isX86()) {11095emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);11096} else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {11097unsigned VLEN = VLENVal.getExtValue();11098StringRef MangledName = Fn->getName();11099if (CGM.getTarget().hasFeature("sve"))11100emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,11101MangledName, 's', 128, Fn, ExprLoc);11102else if (CGM.getTarget().hasFeature("neon"))11103emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,11104MangledName, 'n', 128, Fn, ExprLoc);11105}11106}11107FD = FD->getPreviousDecl();11108}11109}1111011111namespace {11112/// Cleanup action for doacross support.11113class DoacrossCleanupTy final : public EHScopeStack::Cleanup {11114public:11115static const int DoacrossFinArgs = 2;1111611117private:11118llvm::FunctionCallee RTLFn;11119llvm::Value *Args[DoacrossFinArgs];1112011121public:11122DoacrossCleanupTy(llvm::FunctionCallee RTLFn,11123ArrayRef<llvm::Value *> CallArgs)11124: RTLFn(RTLFn) {11125assert(CallArgs.size() == DoacrossFinArgs);11126std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));11127}11128void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {11129if (!CGF.HaveInsertPoint())11130return;11131CGF.EmitRuntimeCall(RTLFn, Args);11132}11133};11134} // namespace1113511136void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,11137const OMPLoopDirective &D,11138ArrayRef<Expr *> NumIterations) {11139if (!CGF.HaveInsertPoint())11140return;1114111142ASTContext &C = CGM.getContext();11143QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);11144RecordDecl *RD;11145if (KmpDimTy.isNull()) {11146// Build struct kmp_dim { // loop bounds info casted to kmp_int6411147// kmp_int64 lo; // lower11148// kmp_int64 up; // upper11149// kmp_int64 st; // stride11150// };11151RD = C.buildImplicitRecord("kmp_dim");11152RD->startDefinition();11153addFieldToRecordDecl(C, RD, Int64Ty);11154addFieldToRecordDecl(C, RD, Int64Ty);11155addFieldToRecordDecl(C, RD, Int64Ty);11156RD->completeDefinition();11157KmpDimTy = C.getRecordType(RD);11158} else {11159RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());11160}11161llvm::APInt Size(/*numBits=*/32, NumIterations.size());11162QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,11163ArraySizeModifier::Normal, 0);1116411165Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");11166CGF.EmitNullInitialization(DimsAddr, ArrayTy);11167enum { LowerFD = 0, UpperFD, StrideFD };11168// Fill dims with data.11169for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {11170LValue DimsLVal = CGF.MakeAddrLValue(11171CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);11172// dims.upper = num_iterations;11173LValue UpperLVal = CGF.EmitLValueForField(11174DimsLVal, *std::next(RD->field_begin(), UpperFD));11175llvm::Value *NumIterVal = CGF.EmitScalarConversion(11176CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),11177Int64Ty, NumIterations[I]->getExprLoc());11178CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);11179// dims.stride = 1;11180LValue StrideLVal = CGF.EmitLValueForField(11181DimsLVal, *std::next(RD->field_begin(), StrideFD));11182CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),11183StrideLVal);11184}1118511186// Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,11187// kmp_int32 num_dims, struct kmp_dim * dims);11188llvm::Value *Args[] = {11189emitUpdateLocation(CGF, D.getBeginLoc()),11190getThreadID(CGF, D.getBeginLoc()),11191llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),11192CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(11193CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),11194CGM.VoidPtrTy)};1119511196llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(11197CGM.getModule(), OMPRTL___kmpc_doacross_init);11198CGF.EmitRuntimeCall(RTLFn, Args);11199llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {11200emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};11201llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(11202CGM.getModule(), OMPRTL___kmpc_doacross_fini);11203CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,11204llvm::ArrayRef(FiniArgs));11205}1120611207template <typename T>11208static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,11209const T *C, llvm::Value *ULoc,11210llvm::Value *ThreadID) {11211QualType Int64Ty =11212CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);11213llvm::APInt Size(/*numBits=*/32, C->getNumLoops());11214QualType ArrayTy = CGM.getContext().getConstantArrayType(11215Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);11216Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");11217for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {11218const Expr *CounterVal = C->getLoopData(I);11219assert(CounterVal);11220llvm::Value *CntVal = CGF.EmitScalarConversion(11221CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,11222CounterVal->getExprLoc());11223CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),11224/*Volatile=*/false, Int64Ty);11225}11226llvm::Value *Args[] = {11227ULoc, ThreadID,11228CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};11229llvm::FunctionCallee RTLFn;11230llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();11231OMPDoacrossKind<T> ODK;11232if (ODK.isSource(C)) {11233RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),11234OMPRTL___kmpc_doacross_post);11235} else {11236assert(ODK.isSink(C) && "Expect sink modifier.");11237RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),11238OMPRTL___kmpc_doacross_wait);11239}11240CGF.EmitRuntimeCall(RTLFn, Args);11241}1124211243void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,11244const OMPDependClause *C) {11245return EmitDoacrossOrdered<OMPDependClause>(11246CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),11247getThreadID(CGF, C->getBeginLoc()));11248}1124911250void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,11251const OMPDoacrossClause *C) {11252return EmitDoacrossOrdered<OMPDoacrossClause>(11253CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),11254getThreadID(CGF, C->getBeginLoc()));11255}1125611257void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,11258llvm::FunctionCallee Callee,11259ArrayRef<llvm::Value *> Args) const {11260assert(Loc.isValid() && "Outlined function call location must be valid.");11261auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);1126211263if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {11264if (Fn->doesNotThrow()) {11265CGF.EmitNounwindRuntimeCall(Fn, Args);11266return;11267}11268}11269CGF.EmitRuntimeCall(Callee, Args);11270}1127111272void CGOpenMPRuntime::emitOutlinedFunctionCall(11273CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,11274ArrayRef<llvm::Value *> Args) const {11275emitCall(CGF, Loc, OutlinedFn, Args);11276}1127711278void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {11279if (const auto *FD = dyn_cast<FunctionDecl>(D))11280if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))11281HasEmittedDeclareTargetRegion = true;11282}1128311284Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,11285const VarDecl *NativeParam,11286const VarDecl *TargetParam) const {11287return CGF.GetAddrOfLocalVar(NativeParam);11288}1128911290/// Return allocator value from expression, or return a null allocator (default11291/// when no allocator specified).11292static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,11293const Expr *Allocator) {11294llvm::Value *AllocVal;11295if (Allocator) {11296AllocVal = CGF.EmitScalarExpr(Allocator);11297// According to the standard, the original allocator type is a enum11298// (integer). Convert to pointer type, if required.11299AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),11300CGF.getContext().VoidPtrTy,11301Allocator->getExprLoc());11302} else {11303// If no allocator specified, it defaults to the null allocator.11304AllocVal = llvm::Constant::getNullValue(11305CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));11306}11307return AllocVal;11308}1130911310/// Return the alignment from an allocate directive if present.11311static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {11312std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);1131311314if (!AllocateAlignment)11315return nullptr;1131611317return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());11318}1131911320Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,11321const VarDecl *VD) {11322if (!VD)11323return Address::invalid();11324Address UntiedAddr = Address::invalid();11325Address UntiedRealAddr = Address::invalid();11326auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);11327if (It != FunctionToUntiedTaskStackMap.end()) {11328const UntiedLocalVarsAddressesMap &UntiedData =11329UntiedLocalVarsStack[It->second];11330auto I = UntiedData.find(VD);11331if (I != UntiedData.end()) {11332UntiedAddr = I->second.first;11333UntiedRealAddr = I->second.second;11334}11335}11336const VarDecl *CVD = VD->getCanonicalDecl();11337if (CVD->hasAttr<OMPAllocateDeclAttr>()) {11338// Use the default allocation.11339if (!isAllocatableDecl(VD))11340return UntiedAddr;11341llvm::Value *Size;11342CharUnits Align = CGM.getContext().getDeclAlign(CVD);11343if (CVD->getType()->isVariablyModifiedType()) {11344Size = CGF.getTypeSize(CVD->getType());11345// Align the size: ((size + align - 1) / align) * align11346Size = CGF.Builder.CreateNUWAdd(11347Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));11348Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));11349Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));11350} else {11351CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());11352Size = CGM.getSize(Sz.alignTo(Align));11353}11354llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());11355const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();11356const Expr *Allocator = AA->getAllocator();11357llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);11358llvm::Value *Alignment = getAlignmentValue(CGM, CVD);11359SmallVector<llvm::Value *, 4> Args;11360Args.push_back(ThreadID);11361if (Alignment)11362Args.push_back(Alignment);11363Args.push_back(Size);11364Args.push_back(AllocVal);11365llvm::omp::RuntimeFunction FnID =11366Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;11367llvm::Value *Addr = CGF.EmitRuntimeCall(11368OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,11369getName({CVD->getName(), ".void.addr"}));11370llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(11371CGM.getModule(), OMPRTL___kmpc_free);11372QualType Ty = CGM.getContext().getPointerType(CVD->getType());11373Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(11374Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));11375if (UntiedAddr.isValid())11376CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);1137711378// Cleanup action for allocate support.11379class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {11380llvm::FunctionCallee RTLFn;11381SourceLocation::UIntTy LocEncoding;11382Address Addr;11383const Expr *AllocExpr;1138411385public:11386OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,11387SourceLocation::UIntTy LocEncoding, Address Addr,11388const Expr *AllocExpr)11389: RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),11390AllocExpr(AllocExpr) {}11391void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {11392if (!CGF.HaveInsertPoint())11393return;11394llvm::Value *Args[3];11395Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(11396CGF, SourceLocation::getFromRawEncoding(LocEncoding));11397Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(11398Addr.emitRawPointer(CGF), CGF.VoidPtrTy);11399llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);11400Args[2] = AllocVal;11401CGF.EmitRuntimeCall(RTLFn, Args);11402}11403};11404Address VDAddr =11405UntiedRealAddr.isValid()11406? UntiedRealAddr11407: Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);11408CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(11409NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),11410VDAddr, Allocator);11411if (UntiedRealAddr.isValid())11412if (auto *Region =11413dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))11414Region->emitUntiedSwitch(CGF);11415return VDAddr;11416}11417return UntiedAddr;11418}1141911420bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,11421const VarDecl *VD) const {11422auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);11423if (It == FunctionToUntiedTaskStackMap.end())11424return false;11425return UntiedLocalVarsStack[It->second].count(VD) > 0;11426}1142711428CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(11429CodeGenModule &CGM, const OMPLoopDirective &S)11430: CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {11431assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");11432if (!NeedToPush)11433return;11434NontemporalDeclsSet &DS =11435CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();11436for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {11437for (const Stmt *Ref : C->private_refs()) {11438const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();11439const ValueDecl *VD;11440if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {11441VD = DRE->getDecl();11442} else {11443const auto *ME = cast<MemberExpr>(SimpleRefExpr);11444assert((ME->isImplicitCXXThis() ||11445isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&11446"Expected member of current class.");11447VD = ME->getMemberDecl();11448}11449DS.insert(VD);11450}11451}11452}1145311454CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {11455if (!NeedToPush)11456return;11457CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();11458}1145911460CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(11461CodeGenFunction &CGF,11462const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,11463std::pair<Address, Address>> &LocalVars)11464: CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {11465if (!NeedToPush)11466return;11467CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(11468CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());11469CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);11470}1147111472CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {11473if (!NeedToPush)11474return;11475CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();11476}1147711478bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {11479assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");1148011481return llvm::any_of(11482CGM.getOpenMPRuntime().NontemporalDeclsStack,11483[VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });11484}1148511486void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(11487const OMPExecutableDirective &S,11488llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)11489const {11490llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;11491// Vars in target/task regions must be excluded completely.11492if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||11493isOpenMPTaskingDirective(S.getDirectiveKind())) {11494SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;11495getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());11496const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());11497for (const CapturedStmt::Capture &Cap : CS->captures()) {11498if (Cap.capturesVariable() || Cap.capturesVariableByCopy())11499NeedToCheckForLPCs.insert(Cap.getCapturedVar());11500}11501}11502// Exclude vars in private clauses.11503for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {11504for (const Expr *Ref : C->varlists()) {11505if (!Ref->getType()->isScalarType())11506continue;11507const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());11508if (!DRE)11509continue;11510NeedToCheckForLPCs.insert(DRE->getDecl());11511}11512}11513for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {11514for (const Expr *Ref : C->varlists()) {11515if (!Ref->getType()->isScalarType())11516continue;11517const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());11518if (!DRE)11519continue;11520NeedToCheckForLPCs.insert(DRE->getDecl());11521}11522}11523for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {11524for (const Expr *Ref : C->varlists()) {11525if (!Ref->getType()->isScalarType())11526continue;11527const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());11528if (!DRE)11529continue;11530NeedToCheckForLPCs.insert(DRE->getDecl());11531}11532}11533for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {11534for (const Expr *Ref : C->varlists()) {11535if (!Ref->getType()->isScalarType())11536continue;11537const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());11538if (!DRE)11539continue;11540NeedToCheckForLPCs.insert(DRE->getDecl());11541}11542}11543for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {11544for (const Expr *Ref : C->varlists()) {11545if (!Ref->getType()->isScalarType())11546continue;11547const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());11548if (!DRE)11549continue;11550NeedToCheckForLPCs.insert(DRE->getDecl());11551}11552}11553for (const Decl *VD : NeedToCheckForLPCs) {11554for (const LastprivateConditionalData &Data :11555llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {11556if (Data.DeclToUniqueName.count(VD) > 0) {11557if (!Data.Disabled)11558NeedToAddForLPCsAsDisabled.insert(VD);11559break;11560}11561}11562}11563}1156411565CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(11566CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)11567: CGM(CGF.CGM),11568Action((CGM.getLangOpts().OpenMP >= 50 &&11569llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),11570[](const OMPLastprivateClause *C) {11571return C->getKind() ==11572OMPC_LASTPRIVATE_conditional;11573}))11574? ActionToDo::PushAsLastprivateConditional11575: ActionToDo::DoNotPush) {11576assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");11577if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)11578return;11579assert(Action == ActionToDo::PushAsLastprivateConditional &&11580"Expected a push action.");11581LastprivateConditionalData &Data =11582CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();11583for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {11584if (C->getKind() != OMPC_LASTPRIVATE_conditional)11585continue;1158611587for (const Expr *Ref : C->varlists()) {11588Data.DeclToUniqueName.insert(std::make_pair(11589cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),11590SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));11591}11592}11593Data.IVLVal = IVLVal;11594Data.Fn = CGF.CurFn;11595}1159611597CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(11598CodeGenFunction &CGF, const OMPExecutableDirective &S)11599: CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {11600assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");11601if (CGM.getLangOpts().OpenMP < 50)11602return;11603llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;11604tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);11605if (!NeedToAddForLPCsAsDisabled.empty()) {11606Action = ActionToDo::DisableLastprivateConditional;11607LastprivateConditionalData &Data =11608CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();11609for (const Decl *VD : NeedToAddForLPCsAsDisabled)11610Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));11611Data.Fn = CGF.CurFn;11612Data.Disabled = true;11613}11614}1161511616CGOpenMPRuntime::LastprivateConditionalRAII11617CGOpenMPRuntime::LastprivateConditionalRAII::disable(11618CodeGenFunction &CGF, const OMPExecutableDirective &S) {11619return LastprivateConditionalRAII(CGF, S);11620}1162111622CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {11623if (CGM.getLangOpts().OpenMP < 50)11624return;11625if (Action == ActionToDo::DisableLastprivateConditional) {11626assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&11627"Expected list of disabled private vars.");11628CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();11629}11630if (Action == ActionToDo::PushAsLastprivateConditional) {11631assert(11632!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&11633"Expected list of lastprivate conditional vars.");11634CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();11635}11636}1163711638Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,11639const VarDecl *VD) {11640ASTContext &C = CGM.getContext();11641auto I = LastprivateConditionalToTypes.find(CGF.CurFn);11642if (I == LastprivateConditionalToTypes.end())11643I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;11644QualType NewType;11645const FieldDecl *VDField;11646const FieldDecl *FiredField;11647LValue BaseLVal;11648auto VI = I->getSecond().find(VD);11649if (VI == I->getSecond().end()) {11650RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");11651RD->startDefinition();11652VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());11653FiredField = addFieldToRecordDecl(C, RD, C.CharTy);11654RD->completeDefinition();11655NewType = C.getRecordType(RD);11656Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());11657BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);11658I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);11659} else {11660NewType = std::get<0>(VI->getSecond());11661VDField = std::get<1>(VI->getSecond());11662FiredField = std::get<2>(VI->getSecond());11663BaseLVal = std::get<3>(VI->getSecond());11664}11665LValue FiredLVal =11666CGF.EmitLValueForField(BaseLVal, FiredField);11667CGF.EmitStoreOfScalar(11668llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),11669FiredLVal);11670return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();11671}1167211673namespace {11674/// Checks if the lastprivate conditional variable is referenced in LHS.11675class LastprivateConditionalRefChecker final11676: public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {11677ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;11678const Expr *FoundE = nullptr;11679const Decl *FoundD = nullptr;11680StringRef UniqueDeclName;11681LValue IVLVal;11682llvm::Function *FoundFn = nullptr;11683SourceLocation Loc;1168411685public:11686bool VisitDeclRefExpr(const DeclRefExpr *E) {11687for (const CGOpenMPRuntime::LastprivateConditionalData &D :11688llvm::reverse(LPM)) {11689auto It = D.DeclToUniqueName.find(E->getDecl());11690if (It == D.DeclToUniqueName.end())11691continue;11692if (D.Disabled)11693return false;11694FoundE = E;11695FoundD = E->getDecl()->getCanonicalDecl();11696UniqueDeclName = It->second;11697IVLVal = D.IVLVal;11698FoundFn = D.Fn;11699break;11700}11701return FoundE == E;11702}11703bool VisitMemberExpr(const MemberExpr *E) {11704if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))11705return false;11706for (const CGOpenMPRuntime::LastprivateConditionalData &D :11707llvm::reverse(LPM)) {11708auto It = D.DeclToUniqueName.find(E->getMemberDecl());11709if (It == D.DeclToUniqueName.end())11710continue;11711if (D.Disabled)11712return false;11713FoundE = E;11714FoundD = E->getMemberDecl()->getCanonicalDecl();11715UniqueDeclName = It->second;11716IVLVal = D.IVLVal;11717FoundFn = D.Fn;11718break;11719}11720return FoundE == E;11721}11722bool VisitStmt(const Stmt *S) {11723for (const Stmt *Child : S->children()) {11724if (!Child)11725continue;11726if (const auto *E = dyn_cast<Expr>(Child))11727if (!E->isGLValue())11728continue;11729if (Visit(Child))11730return true;11731}11732return false;11733}11734explicit LastprivateConditionalRefChecker(11735ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)11736: LPM(LPM) {}11737std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>11738getFoundData() const {11739return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);11740}11741};11742} // namespace1174311744void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,11745LValue IVLVal,11746StringRef UniqueDeclName,11747LValue LVal,11748SourceLocation Loc) {11749// Last updated loop counter for the lastprivate conditional var.11750// int<xx> last_iv = 0;11751llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());11752llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(11753LLIVTy, getName({UniqueDeclName, "iv"}));11754cast<llvm::GlobalVariable>(LastIV)->setAlignment(11755IVLVal.getAlignment().getAsAlign());11756LValue LastIVLVal =11757CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());1175811759// Last value of the lastprivate conditional.11760// decltype(priv_a) last_a;11761llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(11762CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);11763cast<llvm::GlobalVariable>(Last)->setAlignment(11764LVal.getAlignment().getAsAlign());11765LValue LastLVal =11766CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());1176711768// Global loop counter. Required to handle inner parallel-for regions.11769// iv11770llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);1177111772// #pragma omp critical(a)11773// if (last_iv <= iv) {11774// last_iv = iv;11775// last_a = priv_a;11776// }11777auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,11778Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {11779Action.Enter(CGF);11780llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);11781// (last_iv <= iv) ? Check if the variable is updated and store new11782// value in global var.11783llvm::Value *CmpRes;11784if (IVLVal.getType()->isSignedIntegerType()) {11785CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);11786} else {11787assert(IVLVal.getType()->isUnsignedIntegerType() &&11788"Loop iteration variable must be integer.");11789CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);11790}11791llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");11792llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");11793CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);11794// {11795CGF.EmitBlock(ThenBB);1179611797// last_iv = iv;11798CGF.EmitStoreOfScalar(IVVal, LastIVLVal);1179911800// last_a = priv_a;11801switch (CGF.getEvaluationKind(LVal.getType())) {11802case TEK_Scalar: {11803llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);11804CGF.EmitStoreOfScalar(PrivVal, LastLVal);11805break;11806}11807case TEK_Complex: {11808CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);11809CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);11810break;11811}11812case TEK_Aggregate:11813llvm_unreachable(11814"Aggregates are not supported in lastprivate conditional.");11815}11816// }11817CGF.EmitBranch(ExitBB);11818// There is no need to emit line number for unconditional branch.11819(void)ApplyDebugLocation::CreateEmpty(CGF);11820CGF.EmitBlock(ExitBB, /*IsFinished=*/true);11821};1182211823if (CGM.getLangOpts().OpenMPSimd) {11824// Do not emit as a critical region as no parallel region could be emitted.11825RegionCodeGenTy ThenRCG(CodeGen);11826ThenRCG(CGF);11827} else {11828emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);11829}11830}1183111832void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,11833const Expr *LHS) {11834if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())11835return;11836LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);11837if (!Checker.Visit(LHS))11838return;11839const Expr *FoundE;11840const Decl *FoundD;11841StringRef UniqueDeclName;11842LValue IVLVal;11843llvm::Function *FoundFn;11844std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =11845Checker.getFoundData();11846if (FoundFn != CGF.CurFn) {11847// Special codegen for inner parallel regions.11848// ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;11849auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);11850assert(It != LastprivateConditionalToTypes[FoundFn].end() &&11851"Lastprivate conditional is not found in outer region.");11852QualType StructTy = std::get<0>(It->getSecond());11853const FieldDecl* FiredDecl = std::get<2>(It->getSecond());11854LValue PrivLVal = CGF.EmitLValue(FoundE);11855Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(11856PrivLVal.getAddress(),11857CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),11858CGF.ConvertTypeForMem(StructTy));11859LValue BaseLVal =11860CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);11861LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);11862CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(11863CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),11864FiredLVal, llvm::AtomicOrdering::Unordered,11865/*IsVolatile=*/true, /*isInit=*/false);11866return;11867}1186811869// Private address of the lastprivate conditional in the current context.11870// priv_a11871LValue LVal = CGF.EmitLValue(FoundE);11872emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,11873FoundE->getExprLoc());11874}1187511876void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(11877CodeGenFunction &CGF, const OMPExecutableDirective &D,11878const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {11879if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())11880return;11881auto Range = llvm::reverse(LastprivateConditionalStack);11882auto It = llvm::find_if(11883Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });11884if (It == Range.end() || It->Fn != CGF.CurFn)11885return;11886auto LPCI = LastprivateConditionalToTypes.find(It->Fn);11887assert(LPCI != LastprivateConditionalToTypes.end() &&11888"Lastprivates must be registered already.");11889SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;11890getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());11891const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());11892for (const auto &Pair : It->DeclToUniqueName) {11893const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());11894if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))11895continue;11896auto I = LPCI->getSecond().find(Pair.first);11897assert(I != LPCI->getSecond().end() &&11898"Lastprivate must be rehistered already.");11899// bool Cmp = priv_a.Fired != 0;11900LValue BaseLVal = std::get<3>(I->getSecond());11901LValue FiredLVal =11902CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));11903llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());11904llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);11905llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");11906llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");11907// if (Cmp) {11908CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);11909CGF.EmitBlock(ThenBB);11910Address Addr = CGF.GetAddrOfLocalVar(VD);11911LValue LVal;11912if (VD->getType()->isReferenceType())11913LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),11914AlignmentSource::Decl);11915else11916LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),11917AlignmentSource::Decl);11918emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,11919D.getBeginLoc());11920auto AL = ApplyDebugLocation::CreateArtificial(CGF);11921CGF.EmitBlock(DoneBB, /*IsFinal=*/true);11922// }11923}11924}1192511926void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(11927CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,11928SourceLocation Loc) {11929if (CGF.getLangOpts().OpenMP < 50)11930return;11931auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);11932assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&11933"Unknown lastprivate conditional variable.");11934StringRef UniqueName = It->second;11935llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);11936// The variable was not updated in the region - exit.11937if (!GV)11938return;11939LValue LPLVal = CGF.MakeRawAddrLValue(11940GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());11941llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);11942CGF.EmitStoreOfScalar(Res, PrivLVal);11943}1194411945llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(11946CodeGenFunction &CGF, const OMPExecutableDirective &D,11947const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,11948const RegionCodeGenTy &CodeGen) {11949llvm_unreachable("Not supported in SIMD-only mode");11950}1195111952llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(11953CodeGenFunction &CGF, const OMPExecutableDirective &D,11954const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,11955const RegionCodeGenTy &CodeGen) {11956llvm_unreachable("Not supported in SIMD-only mode");11957}1195811959llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(11960const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,11961const VarDecl *PartIDVar, const VarDecl *TaskTVar,11962OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,11963bool Tied, unsigned &NumberOfParts) {11964llvm_unreachable("Not supported in SIMD-only mode");11965}1196611967void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,11968SourceLocation Loc,11969llvm::Function *OutlinedFn,11970ArrayRef<llvm::Value *> CapturedVars,11971const Expr *IfCond,11972llvm::Value *NumThreads) {11973llvm_unreachable("Not supported in SIMD-only mode");11974}1197511976void CGOpenMPSIMDRuntime::emitCriticalRegion(11977CodeGenFunction &CGF, StringRef CriticalName,11978const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,11979const Expr *Hint) {11980llvm_unreachable("Not supported in SIMD-only mode");11981}1198211983void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,11984const RegionCodeGenTy &MasterOpGen,11985SourceLocation Loc) {11986llvm_unreachable("Not supported in SIMD-only mode");11987}1198811989void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,11990const RegionCodeGenTy &MasterOpGen,11991SourceLocation Loc,11992const Expr *Filter) {11993llvm_unreachable("Not supported in SIMD-only mode");11994}1199511996void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,11997SourceLocation Loc) {11998llvm_unreachable("Not supported in SIMD-only mode");11999}1200012001void CGOpenMPSIMDRuntime::emitTaskgroupRegion(12002CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,12003SourceLocation Loc) {12004llvm_unreachable("Not supported in SIMD-only mode");12005}1200612007void CGOpenMPSIMDRuntime::emitSingleRegion(12008CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,12009SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,12010ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,12011ArrayRef<const Expr *> AssignmentOps) {12012llvm_unreachable("Not supported in SIMD-only mode");12013}1201412015void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,12016const RegionCodeGenTy &OrderedOpGen,12017SourceLocation Loc,12018bool IsThreads) {12019llvm_unreachable("Not supported in SIMD-only mode");12020}1202112022void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,12023SourceLocation Loc,12024OpenMPDirectiveKind Kind,12025bool EmitChecks,12026bool ForceSimpleCall) {12027llvm_unreachable("Not supported in SIMD-only mode");12028}1202912030void CGOpenMPSIMDRuntime::emitForDispatchInit(12031CodeGenFunction &CGF, SourceLocation Loc,12032const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,12033bool Ordered, const DispatchRTInput &DispatchValues) {12034llvm_unreachable("Not supported in SIMD-only mode");12035}1203612037void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,12038SourceLocation Loc) {12039llvm_unreachable("Not supported in SIMD-only mode");12040}1204112042void CGOpenMPSIMDRuntime::emitForStaticInit(12043CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,12044const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {12045llvm_unreachable("Not supported in SIMD-only mode");12046}1204712048void CGOpenMPSIMDRuntime::emitDistributeStaticInit(12049CodeGenFunction &CGF, SourceLocation Loc,12050OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {12051llvm_unreachable("Not supported in SIMD-only mode");12052}1205312054void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,12055SourceLocation Loc,12056unsigned IVSize,12057bool IVSigned) {12058llvm_unreachable("Not supported in SIMD-only mode");12059}1206012061void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,12062SourceLocation Loc,12063OpenMPDirectiveKind DKind) {12064llvm_unreachable("Not supported in SIMD-only mode");12065}1206612067llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,12068SourceLocation Loc,12069unsigned IVSize, bool IVSigned,12070Address IL, Address LB,12071Address UB, Address ST) {12072llvm_unreachable("Not supported in SIMD-only mode");12073}1207412075void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,12076llvm::Value *NumThreads,12077SourceLocation Loc) {12078llvm_unreachable("Not supported in SIMD-only mode");12079}1208012081void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,12082ProcBindKind ProcBind,12083SourceLocation Loc) {12084llvm_unreachable("Not supported in SIMD-only mode");12085}1208612087Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,12088const VarDecl *VD,12089Address VDAddr,12090SourceLocation Loc) {12091llvm_unreachable("Not supported in SIMD-only mode");12092}1209312094llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(12095const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,12096CodeGenFunction *CGF) {12097llvm_unreachable("Not supported in SIMD-only mode");12098}1209912100Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(12101CodeGenFunction &CGF, QualType VarType, StringRef Name) {12102llvm_unreachable("Not supported in SIMD-only mode");12103}1210412105void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,12106ArrayRef<const Expr *> Vars,12107SourceLocation Loc,12108llvm::AtomicOrdering AO) {12109llvm_unreachable("Not supported in SIMD-only mode");12110}1211112112void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,12113const OMPExecutableDirective &D,12114llvm::Function *TaskFunction,12115QualType SharedsTy, Address Shareds,12116const Expr *IfCond,12117const OMPTaskDataTy &Data) {12118llvm_unreachable("Not supported in SIMD-only mode");12119}1212012121void CGOpenMPSIMDRuntime::emitTaskLoopCall(12122CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,12123llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,12124const Expr *IfCond, const OMPTaskDataTy &Data) {12125llvm_unreachable("Not supported in SIMD-only mode");12126}1212712128void CGOpenMPSIMDRuntime::emitReduction(12129CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,12130ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,12131ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {12132assert(Options.SimpleReduction && "Only simple reduction is expected.");12133CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,12134ReductionOps, Options);12135}1213612137llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(12138CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,12139ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {12140llvm_unreachable("Not supported in SIMD-only mode");12141}1214212143void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,12144SourceLocation Loc,12145bool IsWorksharingReduction) {12146llvm_unreachable("Not supported in SIMD-only mode");12147}1214812149void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,12150SourceLocation Loc,12151ReductionCodeGen &RCG,12152unsigned N) {12153llvm_unreachable("Not supported in SIMD-only mode");12154}1215512156Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,12157SourceLocation Loc,12158llvm::Value *ReductionsPtr,12159LValue SharedLVal) {12160llvm_unreachable("Not supported in SIMD-only mode");12161}1216212163void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,12164SourceLocation Loc,12165const OMPTaskDataTy &Data) {12166llvm_unreachable("Not supported in SIMD-only mode");12167}1216812169void CGOpenMPSIMDRuntime::emitCancellationPointCall(12170CodeGenFunction &CGF, SourceLocation Loc,12171OpenMPDirectiveKind CancelRegion) {12172llvm_unreachable("Not supported in SIMD-only mode");12173}1217412175void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,12176SourceLocation Loc, const Expr *IfCond,12177OpenMPDirectiveKind CancelRegion) {12178llvm_unreachable("Not supported in SIMD-only mode");12179}1218012181void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(12182const OMPExecutableDirective &D, StringRef ParentName,12183llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,12184bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {12185llvm_unreachable("Not supported in SIMD-only mode");12186}1218712188void CGOpenMPSIMDRuntime::emitTargetCall(12189CodeGenFunction &CGF, const OMPExecutableDirective &D,12190llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,12191llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,12192llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,12193const OMPLoopDirective &D)>12194SizeEmitter) {12195llvm_unreachable("Not supported in SIMD-only mode");12196}1219712198bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {12199llvm_unreachable("Not supported in SIMD-only mode");12200}1220112202bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {12203llvm_unreachable("Not supported in SIMD-only mode");12204}1220512206bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {12207return false;12208}1220912210void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,12211const OMPExecutableDirective &D,12212SourceLocation Loc,12213llvm::Function *OutlinedFn,12214ArrayRef<llvm::Value *> CapturedVars) {12215llvm_unreachable("Not supported in SIMD-only mode");12216}1221712218void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,12219const Expr *NumTeams,12220const Expr *ThreadLimit,12221SourceLocation Loc) {12222llvm_unreachable("Not supported in SIMD-only mode");12223}1222412225void CGOpenMPSIMDRuntime::emitTargetDataCalls(12226CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,12227const Expr *Device, const RegionCodeGenTy &CodeGen,12228CGOpenMPRuntime::TargetDataInfo &Info) {12229llvm_unreachable("Not supported in SIMD-only mode");12230}1223112232void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(12233CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,12234const Expr *Device) {12235llvm_unreachable("Not supported in SIMD-only mode");12236}1223712238void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,12239const OMPLoopDirective &D,12240ArrayRef<Expr *> NumIterations) {12241llvm_unreachable("Not supported in SIMD-only mode");12242}1224312244void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,12245const OMPDependClause *C) {12246llvm_unreachable("Not supported in SIMD-only mode");12247}1224812249void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,12250const OMPDoacrossClause *C) {12251llvm_unreachable("Not supported in SIMD-only mode");12252}1225312254const VarDecl *12255CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,12256const VarDecl *NativeParam) const {12257llvm_unreachable("Not supported in SIMD-only mode");12258}1225912260Address12261CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,12262const VarDecl *NativeParam,12263const VarDecl *TargetParam) const {12264llvm_unreachable("Not supported in SIMD-only mode");12265}122661226712268