Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
35234 views
1
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This provides a class for OpenMP runtime code generation.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CGOpenMPRuntime.h"
14
#include "ABIInfoImpl.h"
15
#include "CGCXXABI.h"
16
#include "CGCleanup.h"
17
#include "CGRecordLayout.h"
18
#include "CodeGenFunction.h"
19
#include "TargetInfo.h"
20
#include "clang/AST/APValue.h"
21
#include "clang/AST/Attr.h"
22
#include "clang/AST/Decl.h"
23
#include "clang/AST/OpenMPClause.h"
24
#include "clang/AST/StmtOpenMP.h"
25
#include "clang/AST/StmtVisitor.h"
26
#include "clang/Basic/BitmaskEnum.h"
27
#include "clang/Basic/FileManager.h"
28
#include "clang/Basic/OpenMPKinds.h"
29
#include "clang/Basic/SourceManager.h"
30
#include "clang/CodeGen/ConstantInitBuilder.h"
31
#include "llvm/ADT/ArrayRef.h"
32
#include "llvm/ADT/SetOperations.h"
33
#include "llvm/ADT/SmallBitVector.h"
34
#include "llvm/ADT/SmallVector.h"
35
#include "llvm/ADT/StringExtras.h"
36
#include "llvm/Bitcode/BitcodeReader.h"
37
#include "llvm/IR/Constants.h"
38
#include "llvm/IR/DerivedTypes.h"
39
#include "llvm/IR/GlobalValue.h"
40
#include "llvm/IR/InstrTypes.h"
41
#include "llvm/IR/Value.h"
42
#include "llvm/Support/AtomicOrdering.h"
43
#include "llvm/Support/Format.h"
44
#include "llvm/Support/raw_ostream.h"
45
#include <cassert>
46
#include <cstdint>
47
#include <numeric>
48
#include <optional>
49
50
using namespace clang;
51
using namespace CodeGen;
52
using namespace llvm::omp;
53
54
namespace {
55
/// Base class for handling code generation inside OpenMP regions.
56
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
57
public:
58
/// Kinds of OpenMP regions used in codegen.
59
enum CGOpenMPRegionKind {
60
/// Region with outlined function for standalone 'parallel'
61
/// directive.
62
ParallelOutlinedRegion,
63
/// Region with outlined function for standalone 'task' directive.
64
TaskOutlinedRegion,
65
/// Region for constructs that do not require function outlining,
66
/// like 'for', 'sections', 'atomic' etc. directives.
67
InlinedRegion,
68
/// Region with outlined function for standalone 'target' directive.
69
TargetRegion,
70
};
71
72
CGOpenMPRegionInfo(const CapturedStmt &CS,
73
const CGOpenMPRegionKind RegionKind,
74
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75
bool HasCancel)
76
: CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
77
CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
78
79
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
80
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
81
bool HasCancel)
82
: CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
83
Kind(Kind), HasCancel(HasCancel) {}
84
85
/// Get a variable or parameter for storing global thread id
86
/// inside OpenMP construct.
87
virtual const VarDecl *getThreadIDVariable() const = 0;
88
89
/// Emit the captured statement body.
90
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
91
92
/// Get an LValue for the current ThreadID variable.
93
/// \return LValue for thread id variable. This LValue always has type int32*.
94
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
95
96
virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
97
98
CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
99
100
OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
101
102
bool hasCancel() const { return HasCancel; }
103
104
static bool classof(const CGCapturedStmtInfo *Info) {
105
return Info->getKind() == CR_OpenMP;
106
}
107
108
~CGOpenMPRegionInfo() override = default;
109
110
protected:
111
CGOpenMPRegionKind RegionKind;
112
RegionCodeGenTy CodeGen;
113
OpenMPDirectiveKind Kind;
114
bool HasCancel;
115
};
116
117
/// API for captured statement code generation in OpenMP constructs.
118
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
119
public:
120
CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
121
const RegionCodeGenTy &CodeGen,
122
OpenMPDirectiveKind Kind, bool HasCancel,
123
StringRef HelperName)
124
: CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
125
HasCancel),
126
ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
127
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
128
}
129
130
/// Get a variable or parameter for storing global thread id
131
/// inside OpenMP construct.
132
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133
134
/// Get the name of the capture helper.
135
StringRef getHelperName() const override { return HelperName; }
136
137
static bool classof(const CGCapturedStmtInfo *Info) {
138
return CGOpenMPRegionInfo::classof(Info) &&
139
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
140
ParallelOutlinedRegion;
141
}
142
143
private:
144
/// A variable or parameter storing global thread id for OpenMP
145
/// constructs.
146
const VarDecl *ThreadIDVar;
147
StringRef HelperName;
148
};
149
150
/// API for captured statement code generation in OpenMP constructs.
151
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
152
public:
153
class UntiedTaskActionTy final : public PrePostActionTy {
154
bool Untied;
155
const VarDecl *PartIDVar;
156
const RegionCodeGenTy UntiedCodeGen;
157
llvm::SwitchInst *UntiedSwitch = nullptr;
158
159
public:
160
UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
161
const RegionCodeGenTy &UntiedCodeGen)
162
: Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
163
void Enter(CodeGenFunction &CGF) override {
164
if (Untied) {
165
// Emit task switching point.
166
LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
167
CGF.GetAddrOfLocalVar(PartIDVar),
168
PartIDVar->getType()->castAs<PointerType>());
169
llvm::Value *Res =
170
CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
171
llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
172
UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
173
CGF.EmitBlock(DoneBB);
174
CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
175
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
176
UntiedSwitch->addCase(CGF.Builder.getInt32(0),
177
CGF.Builder.GetInsertBlock());
178
emitUntiedSwitch(CGF);
179
}
180
}
181
void emitUntiedSwitch(CodeGenFunction &CGF) const {
182
if (Untied) {
183
LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
184
CGF.GetAddrOfLocalVar(PartIDVar),
185
PartIDVar->getType()->castAs<PointerType>());
186
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187
PartIdLVal);
188
UntiedCodeGen(CGF);
189
CodeGenFunction::JumpDest CurPoint =
190
CGF.getJumpDestInCurrentScope(".untied.next.");
191
CGF.EmitBranch(CGF.ReturnBlock.getBlock());
192
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
193
UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
194
CGF.Builder.GetInsertBlock());
195
CGF.EmitBranchThroughCleanup(CurPoint);
196
CGF.EmitBlock(CurPoint.getBlock());
197
}
198
}
199
unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
200
};
201
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
202
const VarDecl *ThreadIDVar,
203
const RegionCodeGenTy &CodeGen,
204
OpenMPDirectiveKind Kind, bool HasCancel,
205
const UntiedTaskActionTy &Action)
206
: CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
207
ThreadIDVar(ThreadIDVar), Action(Action) {
208
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
209
}
210
211
/// Get a variable or parameter for storing global thread id
212
/// inside OpenMP construct.
213
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
214
215
/// Get an LValue for the current ThreadID variable.
216
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
217
218
/// Get the name of the capture helper.
219
StringRef getHelperName() const override { return ".omp_outlined."; }
220
221
void emitUntiedSwitch(CodeGenFunction &CGF) override {
222
Action.emitUntiedSwitch(CGF);
223
}
224
225
static bool classof(const CGCapturedStmtInfo *Info) {
226
return CGOpenMPRegionInfo::classof(Info) &&
227
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
228
TaskOutlinedRegion;
229
}
230
231
private:
232
/// A variable or parameter storing global thread id for OpenMP
233
/// constructs.
234
const VarDecl *ThreadIDVar;
235
/// Action for emitting code for untied tasks.
236
const UntiedTaskActionTy &Action;
237
};
238
239
/// API for inlined captured statement code generation in OpenMP
240
/// constructs.
241
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
242
public:
243
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
244
const RegionCodeGenTy &CodeGen,
245
OpenMPDirectiveKind Kind, bool HasCancel)
246
: CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
247
OldCSI(OldCSI),
248
OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
249
250
// Retrieve the value of the context parameter.
251
llvm::Value *getContextValue() const override {
252
if (OuterRegionInfo)
253
return OuterRegionInfo->getContextValue();
254
llvm_unreachable("No context value for inlined OpenMP region");
255
}
256
257
void setContextValue(llvm::Value *V) override {
258
if (OuterRegionInfo) {
259
OuterRegionInfo->setContextValue(V);
260
return;
261
}
262
llvm_unreachable("No context value for inlined OpenMP region");
263
}
264
265
/// Lookup the captured field decl for a variable.
266
const FieldDecl *lookup(const VarDecl *VD) const override {
267
if (OuterRegionInfo)
268
return OuterRegionInfo->lookup(VD);
269
// If there is no outer outlined region,no need to lookup in a list of
270
// captured variables, we can use the original one.
271
return nullptr;
272
}
273
274
FieldDecl *getThisFieldDecl() const override {
275
if (OuterRegionInfo)
276
return OuterRegionInfo->getThisFieldDecl();
277
return nullptr;
278
}
279
280
/// Get a variable or parameter for storing global thread id
281
/// inside OpenMP construct.
282
const VarDecl *getThreadIDVariable() const override {
283
if (OuterRegionInfo)
284
return OuterRegionInfo->getThreadIDVariable();
285
return nullptr;
286
}
287
288
/// Get an LValue for the current ThreadID variable.
289
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
290
if (OuterRegionInfo)
291
return OuterRegionInfo->getThreadIDVariableLValue(CGF);
292
llvm_unreachable("No LValue for inlined OpenMP construct");
293
}
294
295
/// Get the name of the capture helper.
296
StringRef getHelperName() const override {
297
if (auto *OuterRegionInfo = getOldCSI())
298
return OuterRegionInfo->getHelperName();
299
llvm_unreachable("No helper name for inlined OpenMP construct");
300
}
301
302
void emitUntiedSwitch(CodeGenFunction &CGF) override {
303
if (OuterRegionInfo)
304
OuterRegionInfo->emitUntiedSwitch(CGF);
305
}
306
307
CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
308
309
static bool classof(const CGCapturedStmtInfo *Info) {
310
return CGOpenMPRegionInfo::classof(Info) &&
311
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
312
}
313
314
~CGOpenMPInlinedRegionInfo() override = default;
315
316
private:
317
/// CodeGen info about outer OpenMP region.
318
CodeGenFunction::CGCapturedStmtInfo *OldCSI;
319
CGOpenMPRegionInfo *OuterRegionInfo;
320
};
321
322
/// API for captured statement code generation in OpenMP target
323
/// constructs. For this captures, implicit parameters are used instead of the
324
/// captured fields. The name of the target region has to be unique in a given
325
/// application so it is provided by the client, because only the client has
326
/// the information to generate that.
327
class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
328
public:
329
CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
330
const RegionCodeGenTy &CodeGen, StringRef HelperName)
331
: CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
332
/*HasCancel=*/false),
333
HelperName(HelperName) {}
334
335
/// This is unused for target regions because each starts executing
336
/// with a single thread.
337
const VarDecl *getThreadIDVariable() const override { return nullptr; }
338
339
/// Get the name of the capture helper.
340
StringRef getHelperName() const override { return HelperName; }
341
342
static bool classof(const CGCapturedStmtInfo *Info) {
343
return CGOpenMPRegionInfo::classof(Info) &&
344
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
345
}
346
347
private:
348
StringRef HelperName;
349
};
350
351
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
352
llvm_unreachable("No codegen for expressions");
353
}
354
/// API for generation of expressions captured in a innermost OpenMP
355
/// region.
356
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
357
public:
358
CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
359
: CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
360
OMPD_unknown,
361
/*HasCancel=*/false),
362
PrivScope(CGF) {
363
// Make sure the globals captured in the provided statement are local by
364
// using the privatization logic. We assume the same variable is not
365
// captured more than once.
366
for (const auto &C : CS.captures()) {
367
if (!C.capturesVariable() && !C.capturesVariableByCopy())
368
continue;
369
370
const VarDecl *VD = C.getCapturedVar();
371
if (VD->isLocalVarDeclOrParm())
372
continue;
373
374
DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
375
/*RefersToEnclosingVariableOrCapture=*/false,
376
VD->getType().getNonReferenceType(), VK_LValue,
377
C.getLocation());
378
PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
379
}
380
(void)PrivScope.Privatize();
381
}
382
383
/// Lookup the captured field decl for a variable.
384
const FieldDecl *lookup(const VarDecl *VD) const override {
385
if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
386
return FD;
387
return nullptr;
388
}
389
390
/// Emit the captured statement body.
391
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
392
llvm_unreachable("No body for expressions");
393
}
394
395
/// Get a variable or parameter for storing global thread id
396
/// inside OpenMP construct.
397
const VarDecl *getThreadIDVariable() const override {
398
llvm_unreachable("No thread id for expressions");
399
}
400
401
/// Get the name of the capture helper.
402
StringRef getHelperName() const override {
403
llvm_unreachable("No helper name for expressions");
404
}
405
406
static bool classof(const CGCapturedStmtInfo *Info) { return false; }
407
408
private:
409
/// Private scope to capture global variables.
410
CodeGenFunction::OMPPrivateScope PrivScope;
411
};
412
413
/// RAII for emitting code of OpenMP constructs.
414
class InlinedOpenMPRegionRAII {
415
CodeGenFunction &CGF;
416
llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
417
FieldDecl *LambdaThisCaptureField = nullptr;
418
const CodeGen::CGBlockInfo *BlockInfo = nullptr;
419
bool NoInheritance = false;
420
421
public:
422
/// Constructs region for combined constructs.
423
/// \param CodeGen Code generation sequence for combined directives. Includes
424
/// a list of functions used for code generation of implicitly inlined
425
/// regions.
426
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
427
OpenMPDirectiveKind Kind, bool HasCancel,
428
bool NoInheritance = true)
429
: CGF(CGF), NoInheritance(NoInheritance) {
430
// Start emission for the construct.
431
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
432
CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
433
if (NoInheritance) {
434
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
435
LambdaThisCaptureField = CGF.LambdaThisCaptureField;
436
CGF.LambdaThisCaptureField = nullptr;
437
BlockInfo = CGF.BlockInfo;
438
CGF.BlockInfo = nullptr;
439
}
440
}
441
442
~InlinedOpenMPRegionRAII() {
443
// Restore original CapturedStmtInfo only if we're done with code emission.
444
auto *OldCSI =
445
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
446
delete CGF.CapturedStmtInfo;
447
CGF.CapturedStmtInfo = OldCSI;
448
if (NoInheritance) {
449
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
450
CGF.LambdaThisCaptureField = LambdaThisCaptureField;
451
CGF.BlockInfo = BlockInfo;
452
}
453
}
454
};
455
456
/// Values for bit flags used in the ident_t to describe the fields.
457
/// All enumeric elements are named and described in accordance with the code
458
/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
459
enum OpenMPLocationFlags : unsigned {
460
/// Use trampoline for internal microtask.
461
OMP_IDENT_IMD = 0x01,
462
/// Use c-style ident structure.
463
OMP_IDENT_KMPC = 0x02,
464
/// Atomic reduction option for kmpc_reduce.
465
OMP_ATOMIC_REDUCE = 0x10,
466
/// Explicit 'barrier' directive.
467
OMP_IDENT_BARRIER_EXPL = 0x20,
468
/// Implicit barrier in code.
469
OMP_IDENT_BARRIER_IMPL = 0x40,
470
/// Implicit barrier in 'for' directive.
471
OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
472
/// Implicit barrier in 'sections' directive.
473
OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
474
/// Implicit barrier in 'single' directive.
475
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
476
/// Call of __kmp_for_static_init for static loop.
477
OMP_IDENT_WORK_LOOP = 0x200,
478
/// Call of __kmp_for_static_init for sections.
479
OMP_IDENT_WORK_SECTIONS = 0x400,
480
/// Call of __kmp_for_static_init for distribute.
481
OMP_IDENT_WORK_DISTRIBUTE = 0x800,
482
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
483
};
484
485
/// Describes ident structure that describes a source location.
486
/// All descriptions are taken from
487
/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
488
/// Original structure:
489
/// typedef struct ident {
490
/// kmp_int32 reserved_1; /**< might be used in Fortran;
491
/// see above */
492
/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
493
/// KMP_IDENT_KMPC identifies this union
494
/// member */
495
/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
496
/// see above */
497
///#if USE_ITT_BUILD
498
/// /* but currently used for storing
499
/// region-specific ITT */
500
/// /* contextual information. */
501
///#endif /* USE_ITT_BUILD */
502
/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
503
/// C++ */
504
/// char const *psource; /**< String describing the source location.
505
/// The string is composed of semi-colon separated
506
// fields which describe the source file,
507
/// the function and a pair of line numbers that
508
/// delimit the construct.
509
/// */
510
/// } ident_t;
511
enum IdentFieldIndex {
512
/// might be used in Fortran
513
IdentField_Reserved_1,
514
/// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
515
IdentField_Flags,
516
/// Not really used in Fortran any more
517
IdentField_Reserved_2,
518
/// Source[4] in Fortran, do not use for C++
519
IdentField_Reserved_3,
520
/// String describing the source location. The string is composed of
521
/// semi-colon separated fields which describe the source file, the function
522
/// and a pair of line numbers that delimit the construct.
523
IdentField_PSource
524
};
525
526
/// Schedule types for 'omp for' loops (these enumerators are taken from
527
/// the enum sched_type in kmp.h).
528
enum OpenMPSchedType {
529
/// Lower bound for default (unordered) versions.
530
OMP_sch_lower = 32,
531
OMP_sch_static_chunked = 33,
532
OMP_sch_static = 34,
533
OMP_sch_dynamic_chunked = 35,
534
OMP_sch_guided_chunked = 36,
535
OMP_sch_runtime = 37,
536
OMP_sch_auto = 38,
537
/// static with chunk adjustment (e.g., simd)
538
OMP_sch_static_balanced_chunked = 45,
539
/// Lower bound for 'ordered' versions.
540
OMP_ord_lower = 64,
541
OMP_ord_static_chunked = 65,
542
OMP_ord_static = 66,
543
OMP_ord_dynamic_chunked = 67,
544
OMP_ord_guided_chunked = 68,
545
OMP_ord_runtime = 69,
546
OMP_ord_auto = 70,
547
OMP_sch_default = OMP_sch_static,
548
/// dist_schedule types
549
OMP_dist_sch_static_chunked = 91,
550
OMP_dist_sch_static = 92,
551
/// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
552
/// Set if the monotonic schedule modifier was present.
553
OMP_sch_modifier_monotonic = (1 << 29),
554
/// Set if the nonmonotonic schedule modifier was present.
555
OMP_sch_modifier_nonmonotonic = (1 << 30),
556
};
557
558
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
559
/// region.
560
class CleanupTy final : public EHScopeStack::Cleanup {
561
PrePostActionTy *Action;
562
563
public:
564
explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
565
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
566
if (!CGF.HaveInsertPoint())
567
return;
568
Action->Exit(CGF);
569
}
570
};
571
572
} // anonymous namespace
573
574
void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
575
CodeGenFunction::RunCleanupsScope Scope(CGF);
576
if (PrePostAction) {
577
CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
578
Callback(CodeGen, CGF, *PrePostAction);
579
} else {
580
PrePostActionTy Action;
581
Callback(CodeGen, CGF, Action);
582
}
583
}
584
585
/// Check if the combiner is a call to UDR combiner and if it is so return the
586
/// UDR decl used for reduction.
587
static const OMPDeclareReductionDecl *
588
getReductionInit(const Expr *ReductionOp) {
589
if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
590
if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
591
if (const auto *DRE =
592
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
593
if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
594
return DRD;
595
return nullptr;
596
}
597
598
static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
599
const OMPDeclareReductionDecl *DRD,
600
const Expr *InitOp,
601
Address Private, Address Original,
602
QualType Ty) {
603
if (DRD->getInitializer()) {
604
std::pair<llvm::Function *, llvm::Function *> Reduction =
605
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
606
const auto *CE = cast<CallExpr>(InitOp);
607
const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
608
const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
609
const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
610
const auto *LHSDRE =
611
cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
612
const auto *RHSDRE =
613
cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
614
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
615
PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
616
PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
617
(void)PrivateScope.Privatize();
618
RValue Func = RValue::get(Reduction.second);
619
CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
620
CGF.EmitIgnoredExpr(InitOp);
621
} else {
622
llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
623
std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
624
auto *GV = new llvm::GlobalVariable(
625
CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
626
llvm::GlobalValue::PrivateLinkage, Init, Name);
627
LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
628
RValue InitRVal;
629
switch (CGF.getEvaluationKind(Ty)) {
630
case TEK_Scalar:
631
InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
632
break;
633
case TEK_Complex:
634
InitRVal =
635
RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
636
break;
637
case TEK_Aggregate: {
638
OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
639
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
640
CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
641
/*IsInitializer=*/false);
642
return;
643
}
644
}
645
OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
646
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
647
CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
648
/*IsInitializer=*/false);
649
}
650
}
651
652
/// Emit initialization of arrays of complex types.
653
/// \param DestAddr Address of the array.
654
/// \param Type Type of array.
655
/// \param Init Initial expression of array.
656
/// \param SrcAddr Address of the original array.
657
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
658
QualType Type, bool EmitDeclareReductionInit,
659
const Expr *Init,
660
const OMPDeclareReductionDecl *DRD,
661
Address SrcAddr = Address::invalid()) {
662
// Perform element-by-element initialization.
663
QualType ElementTy;
664
665
// Drill down to the base element type on both arrays.
666
const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
667
llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
668
if (DRD)
669
SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
670
671
llvm::Value *SrcBegin = nullptr;
672
if (DRD)
673
SrcBegin = SrcAddr.emitRawPointer(CGF);
674
llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
675
// Cast from pointer to array type to pointer to single element.
676
llvm::Value *DestEnd =
677
CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
678
// The basic structure here is a while-do loop.
679
llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
680
llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
681
llvm::Value *IsEmpty =
682
CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
683
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
684
685
// Enter the loop body, making that address the current address.
686
llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
687
CGF.EmitBlock(BodyBB);
688
689
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
690
691
llvm::PHINode *SrcElementPHI = nullptr;
692
Address SrcElementCurrent = Address::invalid();
693
if (DRD) {
694
SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
695
"omp.arraycpy.srcElementPast");
696
SrcElementPHI->addIncoming(SrcBegin, EntryBB);
697
SrcElementCurrent =
698
Address(SrcElementPHI, SrcAddr.getElementType(),
699
SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
700
}
701
llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
702
DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
703
DestElementPHI->addIncoming(DestBegin, EntryBB);
704
Address DestElementCurrent =
705
Address(DestElementPHI, DestAddr.getElementType(),
706
DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
707
708
// Emit copy.
709
{
710
CodeGenFunction::RunCleanupsScope InitScope(CGF);
711
if (EmitDeclareReductionInit) {
712
emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
713
SrcElementCurrent, ElementTy);
714
} else
715
CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
716
/*IsInitializer=*/false);
717
}
718
719
if (DRD) {
720
// Shift the address forward by one element.
721
llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
722
SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
723
"omp.arraycpy.dest.element");
724
SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
725
}
726
727
// Shift the address forward by one element.
728
llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
729
DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
730
"omp.arraycpy.dest.element");
731
// Check whether we've reached the end.
732
llvm::Value *Done =
733
CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
734
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
735
DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
736
737
// Done.
738
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
739
}
740
741
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
742
return CGF.EmitOMPSharedLValue(E);
743
}
744
745
LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
746
const Expr *E) {
747
if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
748
return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
749
return LValue();
750
}
751
752
void ReductionCodeGen::emitAggregateInitialization(
753
CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
754
const OMPDeclareReductionDecl *DRD) {
755
// Emit VarDecl with copy init for arrays.
756
// Get the address of the original variable captured in current
757
// captured region.
758
const auto *PrivateVD =
759
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
760
bool EmitDeclareReductionInit =
761
DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
762
EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
763
EmitDeclareReductionInit,
764
EmitDeclareReductionInit ? ClausesData[N].ReductionOp
765
: PrivateVD->getInit(),
766
DRD, SharedAddr);
767
}
768
769
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
770
ArrayRef<const Expr *> Origs,
771
ArrayRef<const Expr *> Privates,
772
ArrayRef<const Expr *> ReductionOps) {
773
ClausesData.reserve(Shareds.size());
774
SharedAddresses.reserve(Shareds.size());
775
Sizes.reserve(Shareds.size());
776
BaseDecls.reserve(Shareds.size());
777
const auto *IOrig = Origs.begin();
778
const auto *IPriv = Privates.begin();
779
const auto *IRed = ReductionOps.begin();
780
for (const Expr *Ref : Shareds) {
781
ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
782
std::advance(IOrig, 1);
783
std::advance(IPriv, 1);
784
std::advance(IRed, 1);
785
}
786
}
787
788
void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
789
assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
790
"Number of generated lvalues must be exactly N.");
791
LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
792
LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
793
SharedAddresses.emplace_back(First, Second);
794
if (ClausesData[N].Shared == ClausesData[N].Ref) {
795
OrigAddresses.emplace_back(First, Second);
796
} else {
797
LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
798
LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
799
OrigAddresses.emplace_back(First, Second);
800
}
801
}
802
803
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
804
QualType PrivateType = getPrivateType(N);
805
bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
806
if (!PrivateType->isVariablyModifiedType()) {
807
Sizes.emplace_back(
808
CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
809
nullptr);
810
return;
811
}
812
llvm::Value *Size;
813
llvm::Value *SizeInChars;
814
auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
815
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
816
if (AsArraySection) {
817
Size = CGF.Builder.CreatePtrDiff(ElemType,
818
OrigAddresses[N].second.getPointer(CGF),
819
OrigAddresses[N].first.getPointer(CGF));
820
Size = CGF.Builder.CreateNUWAdd(
821
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
822
SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
823
} else {
824
SizeInChars =
825
CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
826
Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
827
}
828
Sizes.emplace_back(SizeInChars, Size);
829
CodeGenFunction::OpaqueValueMapping OpaqueMap(
830
CGF,
831
cast<OpaqueValueExpr>(
832
CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
833
RValue::get(Size));
834
CGF.EmitVariablyModifiedType(PrivateType);
835
}
836
837
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
838
llvm::Value *Size) {
839
QualType PrivateType = getPrivateType(N);
840
if (!PrivateType->isVariablyModifiedType()) {
841
assert(!Size && !Sizes[N].second &&
842
"Size should be nullptr for non-variably modified reduction "
843
"items.");
844
return;
845
}
846
CodeGenFunction::OpaqueValueMapping OpaqueMap(
847
CGF,
848
cast<OpaqueValueExpr>(
849
CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
850
RValue::get(Size));
851
CGF.EmitVariablyModifiedType(PrivateType);
852
}
853
854
void ReductionCodeGen::emitInitialization(
855
CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
856
llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
857
assert(SharedAddresses.size() > N && "No variable was generated");
858
const auto *PrivateVD =
859
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
860
const OMPDeclareReductionDecl *DRD =
861
getReductionInit(ClausesData[N].ReductionOp);
862
if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
863
if (DRD && DRD->getInitializer())
864
(void)DefaultInit(CGF);
865
emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
866
} else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
867
(void)DefaultInit(CGF);
868
QualType SharedType = SharedAddresses[N].first.getType();
869
emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
870
PrivateAddr, SharedAddr, SharedType);
871
} else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
872
!CGF.isTrivialInitializer(PrivateVD->getInit())) {
873
CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
874
PrivateVD->getType().getQualifiers(),
875
/*IsInitializer=*/false);
876
}
877
}
878
879
bool ReductionCodeGen::needCleanups(unsigned N) {
880
QualType PrivateType = getPrivateType(N);
881
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
882
return DTorKind != QualType::DK_none;
883
}
884
885
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
886
Address PrivateAddr) {
887
QualType PrivateType = getPrivateType(N);
888
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
889
if (needCleanups(N)) {
890
PrivateAddr =
891
PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
892
CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
893
}
894
}
895
896
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
897
LValue BaseLV) {
898
BaseTy = BaseTy.getNonReferenceType();
899
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
900
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
901
if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
902
BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
903
} else {
904
LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
905
BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
906
}
907
BaseTy = BaseTy->getPointeeType();
908
}
909
return CGF.MakeAddrLValue(
910
BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
911
BaseLV.getType(), BaseLV.getBaseInfo(),
912
CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
913
}
914
915
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
916
Address OriginalBaseAddress, llvm::Value *Addr) {
917
RawAddress Tmp = RawAddress::invalid();
918
Address TopTmp = Address::invalid();
919
Address MostTopTmp = Address::invalid();
920
BaseTy = BaseTy.getNonReferenceType();
921
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
922
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
923
Tmp = CGF.CreateMemTemp(BaseTy);
924
if (TopTmp.isValid())
925
CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
926
else
927
MostTopTmp = Tmp;
928
TopTmp = Tmp;
929
BaseTy = BaseTy->getPointeeType();
930
}
931
932
if (Tmp.isValid()) {
933
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
934
Addr, Tmp.getElementType());
935
CGF.Builder.CreateStore(Addr, Tmp);
936
return MostTopTmp;
937
}
938
939
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
940
Addr, OriginalBaseAddress.getType());
941
return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
942
}
943
944
static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
945
const VarDecl *OrigVD = nullptr;
946
if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
947
const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
948
while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
949
Base = TempOASE->getBase()->IgnoreParenImpCasts();
950
while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
951
Base = TempASE->getBase()->IgnoreParenImpCasts();
952
DE = cast<DeclRefExpr>(Base);
953
OrigVD = cast<VarDecl>(DE->getDecl());
954
} else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
955
const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
956
while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
957
Base = TempASE->getBase()->IgnoreParenImpCasts();
958
DE = cast<DeclRefExpr>(Base);
959
OrigVD = cast<VarDecl>(DE->getDecl());
960
}
961
return OrigVD;
962
}
963
964
Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
965
Address PrivateAddr) {
966
const DeclRefExpr *DE;
967
if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
968
BaseDecls.emplace_back(OrigVD);
969
LValue OriginalBaseLValue = CGF.EmitLValue(DE);
970
LValue BaseLValue =
971
loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
972
OriginalBaseLValue);
973
Address SharedAddr = SharedAddresses[N].first.getAddress();
974
llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
975
SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
976
SharedAddr.emitRawPointer(CGF));
977
llvm::Value *PrivatePointer =
978
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
979
PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
980
llvm::Value *Ptr = CGF.Builder.CreateGEP(
981
SharedAddr.getElementType(), PrivatePointer, Adjustment);
982
return castToBase(CGF, OrigVD->getType(),
983
SharedAddresses[N].first.getType(),
984
OriginalBaseLValue.getAddress(), Ptr);
985
}
986
BaseDecls.emplace_back(
987
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
988
return PrivateAddr;
989
}
990
991
bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
992
const OMPDeclareReductionDecl *DRD =
993
getReductionInit(ClausesData[N].ReductionOp);
994
return DRD && DRD->getInitializer();
995
}
996
997
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
998
return CGF.EmitLoadOfPointerLValue(
999
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1000
getThreadIDVariable()->getType()->castAs<PointerType>());
1001
}
1002
1003
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1004
if (!CGF.HaveInsertPoint())
1005
return;
1006
// 1.2.2 OpenMP Language Terminology
1007
// Structured block - An executable statement with a single entry at the
1008
// top and a single exit at the bottom.
1009
// The point of exit cannot be a branch out of the structured block.
1010
// longjmp() and throw() must not violate the entry/exit criteria.
1011
CGF.EHStack.pushTerminate();
1012
if (S)
1013
CGF.incrementProfileCounter(S);
1014
CodeGen(CGF);
1015
CGF.EHStack.popTerminate();
1016
}
1017
1018
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1019
CodeGenFunction &CGF) {
1020
return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1021
getThreadIDVariable()->getType(),
1022
AlignmentSource::Decl);
1023
}
1024
1025
static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1026
QualType FieldTy) {
1027
auto *Field = FieldDecl::Create(
1028
C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1029
C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1030
/*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1031
Field->setAccess(AS_public);
1032
DC->addDecl(Field);
1033
return Field;
1034
}
1035
1036
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1037
: CGM(CGM), OMPBuilder(CGM.getModule()) {
1038
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1039
llvm::OpenMPIRBuilderConfig Config(
1040
CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1041
CGM.getLangOpts().OpenMPOffloadMandatory,
1042
/*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1043
hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1044
OMPBuilder.initialize();
1045
OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1046
? CGM.getLangOpts().OMPHostIRFile
1047
: StringRef{});
1048
OMPBuilder.setConfig(Config);
1049
1050
// The user forces the compiler to behave as if omp requires
1051
// unified_shared_memory was given.
1052
if (CGM.getLangOpts().OpenMPForceUSM) {
1053
HasRequiresUnifiedSharedMemory = true;
1054
OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055
}
1056
}
1057
1058
void CGOpenMPRuntime::clear() {
1059
InternalVars.clear();
1060
// Clean non-target variable declarations possibly used only in debug info.
1061
for (const auto &Data : EmittedNonTargetVariables) {
1062
if (!Data.getValue().pointsToAliveValue())
1063
continue;
1064
auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1065
if (!GV)
1066
continue;
1067
if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068
continue;
1069
GV->eraseFromParent();
1070
}
1071
}
1072
1073
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1074
return OMPBuilder.createPlatformSpecificName(Parts);
1075
}
1076
1077
static llvm::Function *
1078
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1079
const Expr *CombinerInitializer, const VarDecl *In,
1080
const VarDecl *Out, bool IsCombiner) {
1081
// void .omp_combiner.(Ty *in, Ty *out);
1082
ASTContext &C = CGM.getContext();
1083
QualType PtrTy = C.getPointerType(Ty).withRestrict();
1084
FunctionArgList Args;
1085
ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086
/*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087
ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088
/*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089
Args.push_back(&OmpOutParm);
1090
Args.push_back(&OmpInParm);
1091
const CGFunctionInfo &FnInfo =
1092
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1093
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1094
std::string Name = CGM.getOpenMPRuntime().getName(
1095
{IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1097
Name, &CGM.getModule());
1098
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1099
if (CGM.getLangOpts().Optimize) {
1100
Fn->removeFnAttr(llvm::Attribute::NoInline);
1101
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1102
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1103
}
1104
CodeGenFunction CGF(CGM);
1105
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1108
Out->getLocation());
1109
CodeGenFunction::OMPPrivateScope Scope(CGF);
1110
Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1111
Scope.addPrivate(
1112
In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1113
.getAddress());
1114
Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1115
Scope.addPrivate(
1116
Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1117
.getAddress());
1118
(void)Scope.Privatize();
1119
if (!IsCombiner && Out->hasInit() &&
1120
!CGF.isTrivialInitializer(Out->getInit())) {
1121
CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1122
Out->getType().getQualifiers(),
1123
/*IsInitializer=*/true);
1124
}
1125
if (CombinerInitializer)
1126
CGF.EmitIgnoredExpr(CombinerInitializer);
1127
Scope.ForceCleanup();
1128
CGF.FinishFunction();
1129
return Fn;
1130
}
1131
1132
void CGOpenMPRuntime::emitUserDefinedReduction(
1133
CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1134
if (UDRMap.count(D) > 0)
1135
return;
1136
llvm::Function *Combiner = emitCombinerOrInitializer(
1137
CGM, D->getType(), D->getCombiner(),
1138
cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1139
cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1140
/*IsCombiner=*/true);
1141
llvm::Function *Initializer = nullptr;
1142
if (const Expr *Init = D->getInitializer()) {
1143
Initializer = emitCombinerOrInitializer(
1144
CGM, D->getType(),
1145
D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1146
: nullptr,
1147
cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1148
cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1149
/*IsCombiner=*/false);
1150
}
1151
UDRMap.try_emplace(D, Combiner, Initializer);
1152
if (CGF) {
1153
auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1154
Decls.second.push_back(D);
1155
}
1156
}
1157
1158
std::pair<llvm::Function *, llvm::Function *>
1159
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1160
auto I = UDRMap.find(D);
1161
if (I != UDRMap.end())
1162
return I->second;
1163
emitUserDefinedReduction(/*CGF=*/nullptr, D);
1164
return UDRMap.lookup(D);
1165
}
1166
1167
namespace {
1168
// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1169
// Builder if one is present.
1170
struct PushAndPopStackRAII {
1171
PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1172
bool HasCancel, llvm::omp::Directive Kind)
1173
: OMPBuilder(OMPBuilder) {
1174
if (!OMPBuilder)
1175
return;
1176
1177
// The following callback is the crucial part of clangs cleanup process.
1178
//
1179
// NOTE:
1180
// Once the OpenMPIRBuilder is used to create parallel regions (and
1181
// similar), the cancellation destination (Dest below) is determined via
1182
// IP. That means if we have variables to finalize we split the block at IP,
1183
// use the new block (=BB) as destination to build a JumpDest (via
1184
// getJumpDestInCurrentScope(BB)) which then is fed to
1185
// EmitBranchThroughCleanup. Furthermore, there will not be the need
1186
// to push & pop an FinalizationInfo object.
1187
// The FiniCB will still be needed but at the point where the
1188
// OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1189
auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1190
assert(IP.getBlock()->end() == IP.getPoint() &&
1191
"Clang CG should cause non-terminated block!");
1192
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1193
CGF.Builder.restoreIP(IP);
1194
CodeGenFunction::JumpDest Dest =
1195
CGF.getOMPCancelDestination(OMPD_parallel);
1196
CGF.EmitBranchThroughCleanup(Dest);
1197
};
1198
1199
// TODO: Remove this once we emit parallel regions through the
1200
// OpenMPIRBuilder as it can do this setup internally.
1201
llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1202
OMPBuilder->pushFinalizationCB(std::move(FI));
1203
}
1204
~PushAndPopStackRAII() {
1205
if (OMPBuilder)
1206
OMPBuilder->popFinalizationCB();
1207
}
1208
llvm::OpenMPIRBuilder *OMPBuilder;
1209
};
1210
} // namespace
1211
1212
static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1213
CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1214
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1215
const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1216
assert(ThreadIDVar->getType()->isPointerType() &&
1217
"thread id variable must be of type kmp_int32 *");
1218
CodeGenFunction CGF(CGM, true);
1219
bool HasCancel = false;
1220
if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1221
HasCancel = OPD->hasCancel();
1222
else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1223
HasCancel = OPD->hasCancel();
1224
else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1225
HasCancel = OPSD->hasCancel();
1226
else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1227
HasCancel = OPFD->hasCancel();
1228
else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1229
HasCancel = OPFD->hasCancel();
1230
else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1231
HasCancel = OPFD->hasCancel();
1232
else if (const auto *OPFD =
1233
dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1234
HasCancel = OPFD->hasCancel();
1235
else if (const auto *OPFD =
1236
dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1237
HasCancel = OPFD->hasCancel();
1238
1239
// TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1240
// parallel region to make cancellation barriers work properly.
1241
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1242
PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1243
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1244
HasCancel, OutlinedHelperName);
1245
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1246
return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1247
}
1248
1249
std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1250
std::string Suffix = getName({"omp_outlined"});
1251
return (Name + Suffix).str();
1252
}
1253
1254
std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1255
return getOutlinedHelperName(CGF.CurFn->getName());
1256
}
1257
1258
std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1259
std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1260
return (Name + Suffix).str();
1261
}
1262
1263
llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1264
CodeGenFunction &CGF, const OMPExecutableDirective &D,
1265
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1266
const RegionCodeGenTy &CodeGen) {
1267
const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1268
return emitParallelOrTeamsOutlinedFunction(
1269
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1270
CodeGen);
1271
}
1272
1273
llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1274
CodeGenFunction &CGF, const OMPExecutableDirective &D,
1275
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1276
const RegionCodeGenTy &CodeGen) {
1277
const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1278
return emitParallelOrTeamsOutlinedFunction(
1279
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1280
CodeGen);
1281
}
1282
1283
llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1284
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1286
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1287
bool Tied, unsigned &NumberOfParts) {
1288
auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1289
PrePostActionTy &) {
1290
llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1291
llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1292
llvm::Value *TaskArgs[] = {
1293
UpLoc, ThreadID,
1294
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1295
TaskTVar->getType()->castAs<PointerType>())
1296
.getPointer(CGF)};
1297
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1298
CGM.getModule(), OMPRTL___kmpc_omp_task),
1299
TaskArgs);
1300
};
1301
CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1302
UntiedCodeGen);
1303
CodeGen.setAction(Action);
1304
assert(!ThreadIDVar->getType()->isPointerType() &&
1305
"thread id variable must be of type kmp_int32 for tasks");
1306
const OpenMPDirectiveKind Region =
1307
isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1308
: OMPD_task;
1309
const CapturedStmt *CS = D.getCapturedStmt(Region);
1310
bool HasCancel = false;
1311
if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1312
HasCancel = TD->hasCancel();
1313
else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1314
HasCancel = TD->hasCancel();
1315
else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1316
HasCancel = TD->hasCancel();
1317
else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1318
HasCancel = TD->hasCancel();
1319
1320
CodeGenFunction CGF(CGM, true);
1321
CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1322
InnermostKind, HasCancel, Action);
1323
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1324
llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1325
if (!Tied)
1326
NumberOfParts = Action.getNumberOfParts();
1327
return Res;
1328
}
1329
1330
void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1331
bool AtCurrentPoint) {
1332
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1333
assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1334
1335
llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1336
if (AtCurrentPoint) {
1337
Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1338
Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1339
} else {
1340
Elem.second.ServiceInsertPt =
1341
new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1342
Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1343
}
1344
}
1345
1346
void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1347
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1348
if (Elem.second.ServiceInsertPt) {
1349
llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1350
Elem.second.ServiceInsertPt = nullptr;
1351
Ptr->eraseFromParent();
1352
}
1353
}
1354
1355
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1356
SourceLocation Loc,
1357
SmallString<128> &Buffer) {
1358
llvm::raw_svector_ostream OS(Buffer);
1359
// Build debug location
1360
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1361
OS << ";" << PLoc.getFilename() << ";";
1362
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1363
OS << FD->getQualifiedNameAsString();
1364
OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1365
return OS.str();
1366
}
1367
1368
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1369
SourceLocation Loc,
1370
unsigned Flags, bool EmitLoc) {
1371
uint32_t SrcLocStrSize;
1372
llvm::Constant *SrcLocStr;
1373
if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1374
llvm::codegenoptions::NoDebugInfo) ||
1375
Loc.isInvalid()) {
1376
SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1377
} else {
1378
std::string FunctionName;
1379
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1380
FunctionName = FD->getQualifiedNameAsString();
1381
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1382
const char *FileName = PLoc.getFilename();
1383
unsigned Line = PLoc.getLine();
1384
unsigned Column = PLoc.getColumn();
1385
SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1386
Column, SrcLocStrSize);
1387
}
1388
unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1389
return OMPBuilder.getOrCreateIdent(
1390
SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1391
}
1392
1393
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1394
SourceLocation Loc) {
1395
assert(CGF.CurFn && "No function in current CodeGenFunction.");
1396
// If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1397
// the clang invariants used below might be broken.
1398
if (CGM.getLangOpts().OpenMPIRBuilder) {
1399
SmallString<128> Buffer;
1400
OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1401
uint32_t SrcLocStrSize;
1402
auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1403
getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1404
return OMPBuilder.getOrCreateThreadID(
1405
OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1406
}
1407
1408
llvm::Value *ThreadID = nullptr;
1409
// Check whether we've already cached a load of the thread id in this
1410
// function.
1411
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1412
if (I != OpenMPLocThreadIDMap.end()) {
1413
ThreadID = I->second.ThreadID;
1414
if (ThreadID != nullptr)
1415
return ThreadID;
1416
}
1417
// If exceptions are enabled, do not use parameter to avoid possible crash.
1418
if (auto *OMPRegionInfo =
1419
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1420
if (OMPRegionInfo->getThreadIDVariable()) {
1421
// Check if this an outlined function with thread id passed as argument.
1422
LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1423
llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1424
if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1425
!CGF.getLangOpts().CXXExceptions ||
1426
CGF.Builder.GetInsertBlock() == TopBlock ||
1427
!isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1428
cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429
TopBlock ||
1430
cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1431
CGF.Builder.GetInsertBlock()) {
1432
ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1433
// If value loaded in entry block, cache it and use it everywhere in
1434
// function.
1435
if (CGF.Builder.GetInsertBlock() == TopBlock) {
1436
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1437
Elem.second.ThreadID = ThreadID;
1438
}
1439
return ThreadID;
1440
}
1441
}
1442
}
1443
1444
// This is not an outlined function region - need to call __kmpc_int32
1445
// kmpc_global_thread_num(ident_t *loc).
1446
// Generate thread id value and cache this value for use across the
1447
// function.
1448
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1449
if (!Elem.second.ServiceInsertPt)
1450
setLocThreadIdInsertPt(CGF);
1451
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1452
CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1453
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1454
llvm::CallInst *Call = CGF.Builder.CreateCall(
1455
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456
OMPRTL___kmpc_global_thread_num),
1457
emitUpdateLocation(CGF, Loc));
1458
Call->setCallingConv(CGF.getRuntimeCC());
1459
Elem.second.ThreadID = Call;
1460
return Call;
1461
}
1462
1463
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1464
assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465
if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1466
clearLocThreadIdInsertPt(CGF);
1467
OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468
}
1469
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470
for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471
UDRMap.erase(D);
1472
FunctionUDRMap.erase(CGF.CurFn);
1473
}
1474
auto I = FunctionUDMMap.find(CGF.CurFn);
1475
if (I != FunctionUDMMap.end()) {
1476
for(const auto *D : I->second)
1477
UDMMap.erase(D);
1478
FunctionUDMMap.erase(I);
1479
}
1480
LastprivateConditionalToTypes.erase(CGF.CurFn);
1481
FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482
}
1483
1484
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485
return OMPBuilder.IdentPtr;
1486
}
1487
1488
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489
if (!Kmpc_MicroTy) {
1490
// Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491
llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492
llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493
Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494
}
1495
return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496
}
1497
1498
llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1499
convertDeviceClause(const VarDecl *VD) {
1500
std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501
OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502
if (!DevTy)
1503
return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505
switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506
case OMPDeclareTargetDeclAttr::DT_Host:
1507
return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508
break;
1509
case OMPDeclareTargetDeclAttr::DT_NoHost:
1510
return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511
break;
1512
case OMPDeclareTargetDeclAttr::DT_Any:
1513
return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514
break;
1515
default:
1516
return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517
break;
1518
}
1519
}
1520
1521
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1522
convertCaptureClause(const VarDecl *VD) {
1523
std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525
if (!MapType)
1526
return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527
switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528
case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529
return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530
break;
1531
case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532
return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1533
break;
1534
case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1535
return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1536
break;
1537
default:
1538
return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1539
break;
1540
}
1541
}
1542
1543
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1544
CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1545
SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1546
1547
auto FileInfoCallBack = [&]() {
1548
SourceManager &SM = CGM.getContext().getSourceManager();
1549
PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1550
1551
llvm::sys::fs::UniqueID ID;
1552
if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1553
PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1554
}
1555
1556
return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1557
};
1558
1559
return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1560
}
1561
1562
ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1563
auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1564
1565
auto LinkageForVariable = [&VD, this]() {
1566
return CGM.getLLVMLinkageVarDefinition(VD);
1567
};
1568
1569
std::vector<llvm::GlobalVariable *> GeneratedRefs;
1570
1571
llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1572
CGM.getContext().getPointerType(VD->getType()));
1573
llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1574
convertCaptureClause(VD), convertDeviceClause(VD),
1575
VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1576
VD->isExternallyVisible(),
1577
getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1578
VD->getCanonicalDecl()->getBeginLoc()),
1579
CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1580
CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1581
LinkageForVariable);
1582
1583
if (!addr)
1584
return ConstantAddress::invalid();
1585
return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1586
}
1587
1588
llvm::Constant *
1589
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1590
assert(!CGM.getLangOpts().OpenMPUseTLS ||
1591
!CGM.getContext().getTargetInfo().isTLSSupported());
1592
// Lookup the entry, lazily creating it if necessary.
1593
std::string Suffix = getName({"cache", ""});
1594
return OMPBuilder.getOrCreateInternalVariable(
1595
CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1596
}
1597
1598
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1599
const VarDecl *VD,
1600
Address VDAddr,
1601
SourceLocation Loc) {
1602
if (CGM.getLangOpts().OpenMPUseTLS &&
1603
CGM.getContext().getTargetInfo().isTLSSupported())
1604
return VDAddr;
1605
1606
llvm::Type *VarTy = VDAddr.getElementType();
1607
llvm::Value *Args[] = {
1608
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1609
CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1610
CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1611
getOrCreateThreadPrivateCache(VD)};
1612
return Address(
1613
CGF.EmitRuntimeCall(
1614
OMPBuilder.getOrCreateRuntimeFunction(
1615
CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1616
Args),
1617
CGF.Int8Ty, VDAddr.getAlignment());
1618
}
1619
1620
void CGOpenMPRuntime::emitThreadPrivateVarInit(
1621
CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1622
llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1623
// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1624
// library.
1625
llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1626
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1627
CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1628
OMPLoc);
1629
// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630
// to register constructor/destructor for variable.
1631
llvm::Value *Args[] = {
1632
OMPLoc,
1633
CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1634
Ctor, CopyCtor, Dtor};
1635
CGF.EmitRuntimeCall(
1636
OMPBuilder.getOrCreateRuntimeFunction(
1637
CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1638
Args);
1639
}
1640
1641
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1642
const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1643
bool PerformInit, CodeGenFunction *CGF) {
1644
if (CGM.getLangOpts().OpenMPUseTLS &&
1645
CGM.getContext().getTargetInfo().isTLSSupported())
1646
return nullptr;
1647
1648
VD = VD->getDefinition(CGM.getContext());
1649
if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1650
QualType ASTTy = VD->getType();
1651
1652
llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1653
const Expr *Init = VD->getAnyInitializer();
1654
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1655
// Generate function that re-emits the declaration's initializer into the
1656
// threadprivate copy of the variable VD
1657
CodeGenFunction CtorCGF(CGM);
1658
FunctionArgList Args;
1659
ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1660
/*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1661
ImplicitParamKind::Other);
1662
Args.push_back(&Dst);
1663
1664
const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1665
CGM.getContext().VoidPtrTy, Args);
1666
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1667
std::string Name = getName({"__kmpc_global_ctor_", ""});
1668
llvm::Function *Fn =
1669
CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1670
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1671
Args, Loc, Loc);
1672
llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1673
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1674
CGM.getContext().VoidPtrTy, Dst.getLocation());
1675
Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1676
VDAddr.getAlignment());
1677
CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1678
/*IsInitializer=*/true);
1679
ArgVal = CtorCGF.EmitLoadOfScalar(
1680
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1681
CGM.getContext().VoidPtrTy, Dst.getLocation());
1682
CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1683
CtorCGF.FinishFunction();
1684
Ctor = Fn;
1685
}
1686
if (VD->getType().isDestructedType() != QualType::DK_none) {
1687
// Generate function that emits destructor call for the threadprivate copy
1688
// of the variable VD
1689
CodeGenFunction DtorCGF(CGM);
1690
FunctionArgList Args;
1691
ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1692
/*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1693
ImplicitParamKind::Other);
1694
Args.push_back(&Dst);
1695
1696
const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1697
CGM.getContext().VoidTy, Args);
1698
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1699
std::string Name = getName({"__kmpc_global_dtor_", ""});
1700
llvm::Function *Fn =
1701
CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1702
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1703
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1704
Loc, Loc);
1705
// Create a scope with an artificial location for the body of this function.
1706
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1707
llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708
DtorCGF.GetAddrOfLocalVar(&Dst),
1709
/*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1710
DtorCGF.emitDestroy(
1711
Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1712
DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1713
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1714
DtorCGF.FinishFunction();
1715
Dtor = Fn;
1716
}
1717
// Do not emit init function if it is not required.
1718
if (!Ctor && !Dtor)
1719
return nullptr;
1720
1721
llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1722
auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1723
/*isVarArg=*/false)
1724
->getPointerTo();
1725
// Copying constructor for the threadprivate variable.
1726
// Must be NULL - reserved by runtime, but currently it requires that this
1727
// parameter is always NULL. Otherwise it fires assertion.
1728
CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1729
if (Ctor == nullptr) {
1730
auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1731
/*isVarArg=*/false)
1732
->getPointerTo();
1733
Ctor = llvm::Constant::getNullValue(CtorTy);
1734
}
1735
if (Dtor == nullptr) {
1736
auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1737
/*isVarArg=*/false)
1738
->getPointerTo();
1739
Dtor = llvm::Constant::getNullValue(DtorTy);
1740
}
1741
if (!CGF) {
1742
auto *InitFunctionTy =
1743
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1744
std::string Name = getName({"__omp_threadprivate_init_", ""});
1745
llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1746
InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1747
CodeGenFunction InitCGF(CGM);
1748
FunctionArgList ArgList;
1749
InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1750
CGM.getTypes().arrangeNullaryFunction(), ArgList,
1751
Loc, Loc);
1752
emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1753
InitCGF.FinishFunction();
1754
return InitFunction;
1755
}
1756
emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1757
}
1758
return nullptr;
1759
}
1760
1761
void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1762
llvm::GlobalValue *GV) {
1763
std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1764
OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1765
1766
// We only need to handle active 'indirect' declare target functions.
1767
if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1768
return;
1769
1770
// Get a mangled name to store the new device global in.
1771
llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1772
CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1773
SmallString<128> Name;
1774
OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1775
1776
// We need to generate a new global to hold the address of the indirectly
1777
// called device function. Doing this allows us to keep the visibility and
1778
// linkage of the associated function unchanged while allowing the runtime to
1779
// access its value.
1780
llvm::GlobalValue *Addr = GV;
1781
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1782
Addr = new llvm::GlobalVariable(
1783
CGM.getModule(), CGM.VoidPtrTy,
1784
/*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1785
nullptr, llvm::GlobalValue::NotThreadLocal,
1786
CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1787
Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1788
}
1789
1790
OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1791
Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1792
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1793
llvm::GlobalValue::WeakODRLinkage);
1794
}
1795
1796
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1797
QualType VarType,
1798
StringRef Name) {
1799
std::string Suffix = getName({"artificial", ""});
1800
llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1801
llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1802
VarLVType, Twine(Name).concat(Suffix).str());
1803
if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1804
CGM.getTarget().isTLSSupported()) {
1805
GAddr->setThreadLocal(/*Val=*/true);
1806
return Address(GAddr, GAddr->getValueType(),
1807
CGM.getContext().getTypeAlignInChars(VarType));
1808
}
1809
std::string CacheSuffix = getName({"cache", ""});
1810
llvm::Value *Args[] = {
1811
emitUpdateLocation(CGF, SourceLocation()),
1812
getThreadID(CGF, SourceLocation()),
1813
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1814
CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1815
/*isSigned=*/false),
1816
OMPBuilder.getOrCreateInternalVariable(
1817
CGM.VoidPtrPtrTy,
1818
Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1819
return Address(
1820
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1821
CGF.EmitRuntimeCall(
1822
OMPBuilder.getOrCreateRuntimeFunction(
1823
CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1824
Args),
1825
VarLVType->getPointerTo(/*AddrSpace=*/0)),
1826
VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1827
}
1828
1829
void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1830
const RegionCodeGenTy &ThenGen,
1831
const RegionCodeGenTy &ElseGen) {
1832
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1833
1834
// If the condition constant folds and can be elided, try to avoid emitting
1835
// the condition and the dead arm of the if/else.
1836
bool CondConstant;
1837
if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1838
if (CondConstant)
1839
ThenGen(CGF);
1840
else
1841
ElseGen(CGF);
1842
return;
1843
}
1844
1845
// Otherwise, the condition did not fold, or we couldn't elide it. Just
1846
// emit the conditional branch.
1847
llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1848
llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1849
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1850
CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1851
1852
// Emit the 'then' code.
1853
CGF.EmitBlock(ThenBlock);
1854
ThenGen(CGF);
1855
CGF.EmitBranch(ContBlock);
1856
// Emit the 'else' code if present.
1857
// There is no need to emit line number for unconditional branch.
1858
(void)ApplyDebugLocation::CreateEmpty(CGF);
1859
CGF.EmitBlock(ElseBlock);
1860
ElseGen(CGF);
1861
// There is no need to emit line number for unconditional branch.
1862
(void)ApplyDebugLocation::CreateEmpty(CGF);
1863
CGF.EmitBranch(ContBlock);
1864
// Emit the continuation block for code after the if.
1865
CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1866
}
1867
1868
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1869
llvm::Function *OutlinedFn,
1870
ArrayRef<llvm::Value *> CapturedVars,
1871
const Expr *IfCond,
1872
llvm::Value *NumThreads) {
1873
if (!CGF.HaveInsertPoint())
1874
return;
1875
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1876
auto &M = CGM.getModule();
1877
auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1878
this](CodeGenFunction &CGF, PrePostActionTy &) {
1879
// Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1880
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1881
llvm::Value *Args[] = {
1882
RTLoc,
1883
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1884
CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1885
llvm::SmallVector<llvm::Value *, 16> RealArgs;
1886
RealArgs.append(std::begin(Args), std::end(Args));
1887
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1888
1889
llvm::FunctionCallee RTLFn =
1890
OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1891
CGF.EmitRuntimeCall(RTLFn, RealArgs);
1892
};
1893
auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1894
this](CodeGenFunction &CGF, PrePostActionTy &) {
1895
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1896
llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1897
// Build calls:
1898
// __kmpc_serialized_parallel(&Loc, GTid);
1899
llvm::Value *Args[] = {RTLoc, ThreadID};
1900
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1901
M, OMPRTL___kmpc_serialized_parallel),
1902
Args);
1903
1904
// OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1905
Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1906
RawAddress ZeroAddrBound =
1907
CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1908
/*Name=*/".bound.zero.addr");
1909
CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1910
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1911
// ThreadId for serialized parallels is 0.
1912
OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1913
OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1914
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1915
1916
// Ensure we do not inline the function. This is trivially true for the ones
1917
// passed to __kmpc_fork_call but the ones called in serialized regions
1918
// could be inlined. This is not a perfect but it is closer to the invariant
1919
// we want, namely, every data environment starts with a new function.
1920
// TODO: We should pass the if condition to the runtime function and do the
1921
// handling there. Much cleaner code.
1922
OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1923
OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1924
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1925
1926
// __kmpc_end_serialized_parallel(&Loc, GTid);
1927
llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1928
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1929
M, OMPRTL___kmpc_end_serialized_parallel),
1930
EndArgs);
1931
};
1932
if (IfCond) {
1933
emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1934
} else {
1935
RegionCodeGenTy ThenRCG(ThenGen);
1936
ThenRCG(CGF);
1937
}
1938
}
1939
1940
// If we're inside an (outlined) parallel region, use the region info's
1941
// thread-ID variable (it is passed in a first argument of the outlined function
1942
// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1943
// regular serial code region, get thread ID by calling kmp_int32
1944
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1945
// return the address of that temp.
1946
Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1947
SourceLocation Loc) {
1948
if (auto *OMPRegionInfo =
1949
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1950
if (OMPRegionInfo->getThreadIDVariable())
1951
return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1952
1953
llvm::Value *ThreadID = getThreadID(CGF, Loc);
1954
QualType Int32Ty =
1955
CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1956
Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1957
CGF.EmitStoreOfScalar(ThreadID,
1958
CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1959
1960
return ThreadIDTemp;
1961
}
1962
1963
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1964
std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1965
std::string Name = getName({Prefix, "var"});
1966
return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1967
}
1968
1969
namespace {
1970
/// Common pre(post)-action for different OpenMP constructs.
1971
class CommonActionTy final : public PrePostActionTy {
1972
llvm::FunctionCallee EnterCallee;
1973
ArrayRef<llvm::Value *> EnterArgs;
1974
llvm::FunctionCallee ExitCallee;
1975
ArrayRef<llvm::Value *> ExitArgs;
1976
bool Conditional;
1977
llvm::BasicBlock *ContBlock = nullptr;
1978
1979
public:
1980
CommonActionTy(llvm::FunctionCallee EnterCallee,
1981
ArrayRef<llvm::Value *> EnterArgs,
1982
llvm::FunctionCallee ExitCallee,
1983
ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1984
: EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1985
ExitArgs(ExitArgs), Conditional(Conditional) {}
1986
void Enter(CodeGenFunction &CGF) override {
1987
llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1988
if (Conditional) {
1989
llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1990
auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1991
ContBlock = CGF.createBasicBlock("omp_if.end");
1992
// Generate the branch (If-stmt)
1993
CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1994
CGF.EmitBlock(ThenBlock);
1995
}
1996
}
1997
void Done(CodeGenFunction &CGF) {
1998
// Emit the rest of blocks/branches
1999
CGF.EmitBranch(ContBlock);
2000
CGF.EmitBlock(ContBlock, true);
2001
}
2002
void Exit(CodeGenFunction &CGF) override {
2003
CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2004
}
2005
};
2006
} // anonymous namespace
2007
2008
void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2009
StringRef CriticalName,
2010
const RegionCodeGenTy &CriticalOpGen,
2011
SourceLocation Loc, const Expr *Hint) {
2012
// __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2013
// CriticalOpGen();
2014
// __kmpc_end_critical(ident_t *, gtid, Lock);
2015
// Prepare arguments and build a call to __kmpc_critical
2016
if (!CGF.HaveInsertPoint())
2017
return;
2018
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2019
getCriticalRegionLock(CriticalName)};
2020
llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2021
std::end(Args));
2022
if (Hint) {
2023
EnterArgs.push_back(CGF.Builder.CreateIntCast(
2024
CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2025
}
2026
CommonActionTy Action(
2027
OMPBuilder.getOrCreateRuntimeFunction(
2028
CGM.getModule(),
2029
Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2030
EnterArgs,
2031
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2032
OMPRTL___kmpc_end_critical),
2033
Args);
2034
CriticalOpGen.setAction(Action);
2035
emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2036
}
2037
2038
void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2039
const RegionCodeGenTy &MasterOpGen,
2040
SourceLocation Loc) {
2041
if (!CGF.HaveInsertPoint())
2042
return;
2043
// if(__kmpc_master(ident_t *, gtid)) {
2044
// MasterOpGen();
2045
// __kmpc_end_master(ident_t *, gtid);
2046
// }
2047
// Prepare arguments and build a call to __kmpc_master
2048
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2049
CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2050
CGM.getModule(), OMPRTL___kmpc_master),
2051
Args,
2052
OMPBuilder.getOrCreateRuntimeFunction(
2053
CGM.getModule(), OMPRTL___kmpc_end_master),
2054
Args,
2055
/*Conditional=*/true);
2056
MasterOpGen.setAction(Action);
2057
emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2058
Action.Done(CGF);
2059
}
2060
2061
void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2062
const RegionCodeGenTy &MaskedOpGen,
2063
SourceLocation Loc, const Expr *Filter) {
2064
if (!CGF.HaveInsertPoint())
2065
return;
2066
// if(__kmpc_masked(ident_t *, gtid, filter)) {
2067
// MaskedOpGen();
2068
// __kmpc_end_masked(iden_t *, gtid);
2069
// }
2070
// Prepare arguments and build a call to __kmpc_masked
2071
llvm::Value *FilterVal = Filter
2072
? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2073
: llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2074
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2075
FilterVal};
2076
llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2077
getThreadID(CGF, Loc)};
2078
CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2079
CGM.getModule(), OMPRTL___kmpc_masked),
2080
Args,
2081
OMPBuilder.getOrCreateRuntimeFunction(
2082
CGM.getModule(), OMPRTL___kmpc_end_masked),
2083
ArgsEnd,
2084
/*Conditional=*/true);
2085
MaskedOpGen.setAction(Action);
2086
emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2087
Action.Done(CGF);
2088
}
2089
2090
void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2091
SourceLocation Loc) {
2092
if (!CGF.HaveInsertPoint())
2093
return;
2094
if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2095
OMPBuilder.createTaskyield(CGF.Builder);
2096
} else {
2097
// Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2098
llvm::Value *Args[] = {
2099
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2100
llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2101
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2102
CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2103
Args);
2104
}
2105
2106
if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2107
Region->emitUntiedSwitch(CGF);
2108
}
2109
2110
void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2111
const RegionCodeGenTy &TaskgroupOpGen,
2112
SourceLocation Loc) {
2113
if (!CGF.HaveInsertPoint())
2114
return;
2115
// __kmpc_taskgroup(ident_t *, gtid);
2116
// TaskgroupOpGen();
2117
// __kmpc_end_taskgroup(ident_t *, gtid);
2118
// Prepare arguments and build a call to __kmpc_taskgroup
2119
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2120
CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2121
CGM.getModule(), OMPRTL___kmpc_taskgroup),
2122
Args,
2123
OMPBuilder.getOrCreateRuntimeFunction(
2124
CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2125
Args);
2126
TaskgroupOpGen.setAction(Action);
2127
emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2128
}
2129
2130
/// Given an array of pointers to variables, project the address of a
2131
/// given variable.
2132
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2133
unsigned Index, const VarDecl *Var) {
2134
// Pull out the pointer to the variable.
2135
Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2136
llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2137
2138
llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2139
return Address(
2140
CGF.Builder.CreateBitCast(
2141
Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2142
ElemTy, CGF.getContext().getDeclAlign(Var));
2143
}
2144
2145
static llvm::Value *emitCopyprivateCopyFunction(
2146
CodeGenModule &CGM, llvm::Type *ArgsElemType,
2147
ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2148
ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2149
SourceLocation Loc) {
2150
ASTContext &C = CGM.getContext();
2151
// void copy_func(void *LHSArg, void *RHSArg);
2152
FunctionArgList Args;
2153
ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2154
ImplicitParamKind::Other);
2155
ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2156
ImplicitParamKind::Other);
2157
Args.push_back(&LHSArg);
2158
Args.push_back(&RHSArg);
2159
const auto &CGFI =
2160
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2161
std::string Name =
2162
CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2163
auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2164
llvm::GlobalValue::InternalLinkage, Name,
2165
&CGM.getModule());
2166
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2167
Fn->setDoesNotRecurse();
2168
CodeGenFunction CGF(CGM);
2169
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2170
// Dest = (void*[n])(LHSArg);
2171
// Src = (void*[n])(RHSArg);
2172
Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2173
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2174
ArgsElemType->getPointerTo()),
2175
ArgsElemType, CGF.getPointerAlign());
2176
Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2177
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2178
ArgsElemType->getPointerTo()),
2179
ArgsElemType, CGF.getPointerAlign());
2180
// *(Type0*)Dst[0] = *(Type0*)Src[0];
2181
// *(Type1*)Dst[1] = *(Type1*)Src[1];
2182
// ...
2183
// *(Typen*)Dst[n] = *(Typen*)Src[n];
2184
for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2185
const auto *DestVar =
2186
cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2187
Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2188
2189
const auto *SrcVar =
2190
cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2191
Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2192
2193
const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2194
QualType Type = VD->getType();
2195
CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2196
}
2197
CGF.FinishFunction();
2198
return Fn;
2199
}
2200
2201
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2202
const RegionCodeGenTy &SingleOpGen,
2203
SourceLocation Loc,
2204
ArrayRef<const Expr *> CopyprivateVars,
2205
ArrayRef<const Expr *> SrcExprs,
2206
ArrayRef<const Expr *> DstExprs,
2207
ArrayRef<const Expr *> AssignmentOps) {
2208
if (!CGF.HaveInsertPoint())
2209
return;
2210
assert(CopyprivateVars.size() == SrcExprs.size() &&
2211
CopyprivateVars.size() == DstExprs.size() &&
2212
CopyprivateVars.size() == AssignmentOps.size());
2213
ASTContext &C = CGM.getContext();
2214
// int32 did_it = 0;
2215
// if(__kmpc_single(ident_t *, gtid)) {
2216
// SingleOpGen();
2217
// __kmpc_end_single(ident_t *, gtid);
2218
// did_it = 1;
2219
// }
2220
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2221
// <copy_func>, did_it);
2222
2223
Address DidIt = Address::invalid();
2224
if (!CopyprivateVars.empty()) {
2225
// int32 did_it = 0;
2226
QualType KmpInt32Ty =
2227
C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2228
DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2229
CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2230
}
2231
// Prepare arguments and build a call to __kmpc_single
2232
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2233
CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2234
CGM.getModule(), OMPRTL___kmpc_single),
2235
Args,
2236
OMPBuilder.getOrCreateRuntimeFunction(
2237
CGM.getModule(), OMPRTL___kmpc_end_single),
2238
Args,
2239
/*Conditional=*/true);
2240
SingleOpGen.setAction(Action);
2241
emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2242
if (DidIt.isValid()) {
2243
// did_it = 1;
2244
CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2245
}
2246
Action.Done(CGF);
2247
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2248
// <copy_func>, did_it);
2249
if (DidIt.isValid()) {
2250
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2251
QualType CopyprivateArrayTy = C.getConstantArrayType(
2252
C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2253
/*IndexTypeQuals=*/0);
2254
// Create a list of all private variables for copyprivate.
2255
Address CopyprivateList =
2256
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2257
for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2258
Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2259
CGF.Builder.CreateStore(
2260
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2261
CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2262
CGF.VoidPtrTy),
2263
Elem);
2264
}
2265
// Build function that copies private values from single region to all other
2266
// threads in the corresponding parallel region.
2267
llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2268
CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2269
SrcExprs, DstExprs, AssignmentOps, Loc);
2270
llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2271
Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2272
CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2273
llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2274
llvm::Value *Args[] = {
2275
emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2276
getThreadID(CGF, Loc), // i32 <gtid>
2277
BufSize, // size_t <buf_size>
2278
CL.emitRawPointer(CGF), // void *<copyprivate list>
2279
CpyFn, // void (*) (void *, void *) <copy_func>
2280
DidItVal // i32 did_it
2281
};
2282
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2283
CGM.getModule(), OMPRTL___kmpc_copyprivate),
2284
Args);
2285
}
2286
}
2287
2288
void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2289
const RegionCodeGenTy &OrderedOpGen,
2290
SourceLocation Loc, bool IsThreads) {
2291
if (!CGF.HaveInsertPoint())
2292
return;
2293
// __kmpc_ordered(ident_t *, gtid);
2294
// OrderedOpGen();
2295
// __kmpc_end_ordered(ident_t *, gtid);
2296
// Prepare arguments and build a call to __kmpc_ordered
2297
if (IsThreads) {
2298
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2299
CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300
CGM.getModule(), OMPRTL___kmpc_ordered),
2301
Args,
2302
OMPBuilder.getOrCreateRuntimeFunction(
2303
CGM.getModule(), OMPRTL___kmpc_end_ordered),
2304
Args);
2305
OrderedOpGen.setAction(Action);
2306
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307
return;
2308
}
2309
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2310
}
2311
2312
unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2313
unsigned Flags;
2314
if (Kind == OMPD_for)
2315
Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2316
else if (Kind == OMPD_sections)
2317
Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2318
else if (Kind == OMPD_single)
2319
Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2320
else if (Kind == OMPD_barrier)
2321
Flags = OMP_IDENT_BARRIER_EXPL;
2322
else
2323
Flags = OMP_IDENT_BARRIER_IMPL;
2324
return Flags;
2325
}
2326
2327
void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2328
CodeGenFunction &CGF, const OMPLoopDirective &S,
2329
OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2330
// Check if the loop directive is actually a doacross loop directive. In this
2331
// case choose static, 1 schedule.
2332
if (llvm::any_of(
2333
S.getClausesOfKind<OMPOrderedClause>(),
2334
[](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2335
ScheduleKind = OMPC_SCHEDULE_static;
2336
// Chunk size is 1 in this case.
2337
llvm::APInt ChunkSize(32, 1);
2338
ChunkExpr = IntegerLiteral::Create(
2339
CGF.getContext(), ChunkSize,
2340
CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2341
SourceLocation());
2342
}
2343
}
2344
2345
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2346
OpenMPDirectiveKind Kind, bool EmitChecks,
2347
bool ForceSimpleCall) {
2348
// Check if we should use the OMPBuilder
2349
auto *OMPRegionInfo =
2350
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2351
if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2352
CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2353
CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2354
return;
2355
}
2356
2357
if (!CGF.HaveInsertPoint())
2358
return;
2359
// Build call __kmpc_cancel_barrier(loc, thread_id);
2360
// Build call __kmpc_barrier(loc, thread_id);
2361
unsigned Flags = getDefaultFlagsForBarriers(Kind);
2362
// Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2363
// thread_id);
2364
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2365
getThreadID(CGF, Loc)};
2366
if (OMPRegionInfo) {
2367
if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2368
llvm::Value *Result = CGF.EmitRuntimeCall(
2369
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2370
OMPRTL___kmpc_cancel_barrier),
2371
Args);
2372
if (EmitChecks) {
2373
// if (__kmpc_cancel_barrier()) {
2374
// exit from construct;
2375
// }
2376
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2377
llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2378
llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2379
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2380
CGF.EmitBlock(ExitBB);
2381
// exit from construct;
2382
CodeGenFunction::JumpDest CancelDestination =
2383
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2384
CGF.EmitBranchThroughCleanup(CancelDestination);
2385
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2386
}
2387
return;
2388
}
2389
}
2390
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2391
CGM.getModule(), OMPRTL___kmpc_barrier),
2392
Args);
2393
}
2394
2395
void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2396
Expr *ME, bool IsFatal) {
2397
llvm::Value *MVL =
2398
ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2399
: llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2400
// Build call void __kmpc_error(ident_t *loc, int severity, const char
2401
// *message)
2402
llvm::Value *Args[] = {
2403
emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2404
llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2405
CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2406
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2407
CGM.getModule(), OMPRTL___kmpc_error),
2408
Args);
2409
}
2410
2411
/// Map the OpenMP loop schedule to the runtime enumeration.
2412
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2413
bool Chunked, bool Ordered) {
2414
switch (ScheduleKind) {
2415
case OMPC_SCHEDULE_static:
2416
return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2417
: (Ordered ? OMP_ord_static : OMP_sch_static);
2418
case OMPC_SCHEDULE_dynamic:
2419
return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2420
case OMPC_SCHEDULE_guided:
2421
return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2422
case OMPC_SCHEDULE_runtime:
2423
return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2424
case OMPC_SCHEDULE_auto:
2425
return Ordered ? OMP_ord_auto : OMP_sch_auto;
2426
case OMPC_SCHEDULE_unknown:
2427
assert(!Chunked && "chunk was specified but schedule kind not known");
2428
return Ordered ? OMP_ord_static : OMP_sch_static;
2429
}
2430
llvm_unreachable("Unexpected runtime schedule");
2431
}
2432
2433
/// Map the OpenMP distribute schedule to the runtime enumeration.
2434
static OpenMPSchedType
2435
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2436
// only static is allowed for dist_schedule
2437
return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2438
}
2439
2440
bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2441
bool Chunked) const {
2442
OpenMPSchedType Schedule =
2443
getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2444
return Schedule == OMP_sch_static;
2445
}
2446
2447
bool CGOpenMPRuntime::isStaticNonchunked(
2448
OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2449
OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2450
return Schedule == OMP_dist_sch_static;
2451
}
2452
2453
bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2454
bool Chunked) const {
2455
OpenMPSchedType Schedule =
2456
getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2457
return Schedule == OMP_sch_static_chunked;
2458
}
2459
2460
bool CGOpenMPRuntime::isStaticChunked(
2461
OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2462
OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2463
return Schedule == OMP_dist_sch_static_chunked;
2464
}
2465
2466
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2467
OpenMPSchedType Schedule =
2468
getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2469
assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2470
return Schedule != OMP_sch_static;
2471
}
2472
2473
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2474
OpenMPScheduleClauseModifier M1,
2475
OpenMPScheduleClauseModifier M2) {
2476
int Modifier = 0;
2477
switch (M1) {
2478
case OMPC_SCHEDULE_MODIFIER_monotonic:
2479
Modifier = OMP_sch_modifier_monotonic;
2480
break;
2481
case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2482
Modifier = OMP_sch_modifier_nonmonotonic;
2483
break;
2484
case OMPC_SCHEDULE_MODIFIER_simd:
2485
if (Schedule == OMP_sch_static_chunked)
2486
Schedule = OMP_sch_static_balanced_chunked;
2487
break;
2488
case OMPC_SCHEDULE_MODIFIER_last:
2489
case OMPC_SCHEDULE_MODIFIER_unknown:
2490
break;
2491
}
2492
switch (M2) {
2493
case OMPC_SCHEDULE_MODIFIER_monotonic:
2494
Modifier = OMP_sch_modifier_monotonic;
2495
break;
2496
case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2497
Modifier = OMP_sch_modifier_nonmonotonic;
2498
break;
2499
case OMPC_SCHEDULE_MODIFIER_simd:
2500
if (Schedule == OMP_sch_static_chunked)
2501
Schedule = OMP_sch_static_balanced_chunked;
2502
break;
2503
case OMPC_SCHEDULE_MODIFIER_last:
2504
case OMPC_SCHEDULE_MODIFIER_unknown:
2505
break;
2506
}
2507
// OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2508
// If the static schedule kind is specified or if the ordered clause is
2509
// specified, and if the nonmonotonic modifier is not specified, the effect is
2510
// as if the monotonic modifier is specified. Otherwise, unless the monotonic
2511
// modifier is specified, the effect is as if the nonmonotonic modifier is
2512
// specified.
2513
if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2514
if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2515
Schedule == OMP_sch_static_balanced_chunked ||
2516
Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2517
Schedule == OMP_dist_sch_static_chunked ||
2518
Schedule == OMP_dist_sch_static))
2519
Modifier = OMP_sch_modifier_nonmonotonic;
2520
}
2521
return Schedule | Modifier;
2522
}
2523
2524
void CGOpenMPRuntime::emitForDispatchInit(
2525
CodeGenFunction &CGF, SourceLocation Loc,
2526
const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2527
bool Ordered, const DispatchRTInput &DispatchValues) {
2528
if (!CGF.HaveInsertPoint())
2529
return;
2530
OpenMPSchedType Schedule = getRuntimeSchedule(
2531
ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2532
assert(Ordered ||
2533
(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2534
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2535
Schedule != OMP_sch_static_balanced_chunked));
2536
// Call __kmpc_dispatch_init(
2537
// ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2538
// kmp_int[32|64] lower, kmp_int[32|64] upper,
2539
// kmp_int[32|64] stride, kmp_int[32|64] chunk);
2540
2541
// If the Chunk was not specified in the clause - use default value 1.
2542
llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2543
: CGF.Builder.getIntN(IVSize, 1);
2544
llvm::Value *Args[] = {
2545
emitUpdateLocation(CGF, Loc),
2546
getThreadID(CGF, Loc),
2547
CGF.Builder.getInt32(addMonoNonMonoModifier(
2548
CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2549
DispatchValues.LB, // Lower
2550
DispatchValues.UB, // Upper
2551
CGF.Builder.getIntN(IVSize, 1), // Stride
2552
Chunk // Chunk
2553
};
2554
CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2555
Args);
2556
}
2557
2558
void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2559
SourceLocation Loc) {
2560
if (!CGF.HaveInsertPoint())
2561
return;
2562
// Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2563
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2564
CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2565
}
2566
2567
static void emitForStaticInitCall(
2568
CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2569
llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2570
OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2571
const CGOpenMPRuntime::StaticRTInput &Values) {
2572
if (!CGF.HaveInsertPoint())
2573
return;
2574
2575
assert(!Values.Ordered);
2576
assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2577
Schedule == OMP_sch_static_balanced_chunked ||
2578
Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2579
Schedule == OMP_dist_sch_static ||
2580
Schedule == OMP_dist_sch_static_chunked);
2581
2582
// Call __kmpc_for_static_init(
2583
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2584
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2585
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2586
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
2587
llvm::Value *Chunk = Values.Chunk;
2588
if (Chunk == nullptr) {
2589
assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2590
Schedule == OMP_dist_sch_static) &&
2591
"expected static non-chunked schedule");
2592
// If the Chunk was not specified in the clause - use default value 1.
2593
Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2594
} else {
2595
assert((Schedule == OMP_sch_static_chunked ||
2596
Schedule == OMP_sch_static_balanced_chunked ||
2597
Schedule == OMP_ord_static_chunked ||
2598
Schedule == OMP_dist_sch_static_chunked) &&
2599
"expected static chunked schedule");
2600
}
2601
llvm::Value *Args[] = {
2602
UpdateLocation,
2603
ThreadId,
2604
CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2605
M2)), // Schedule type
2606
Values.IL.emitRawPointer(CGF), // &isLastIter
2607
Values.LB.emitRawPointer(CGF), // &LB
2608
Values.UB.emitRawPointer(CGF), // &UB
2609
Values.ST.emitRawPointer(CGF), // &Stride
2610
CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2611
Chunk // Chunk
2612
};
2613
CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2614
}
2615
2616
void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2617
SourceLocation Loc,
2618
OpenMPDirectiveKind DKind,
2619
const OpenMPScheduleTy &ScheduleKind,
2620
const StaticRTInput &Values) {
2621
OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2622
ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2623
assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2624
"Expected loop-based or sections-based directive.");
2625
llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2626
isOpenMPLoopDirective(DKind)
2627
? OMP_IDENT_WORK_LOOP
2628
: OMP_IDENT_WORK_SECTIONS);
2629
llvm::Value *ThreadId = getThreadID(CGF, Loc);
2630
llvm::FunctionCallee StaticInitFunction =
2631
OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2632
false);
2633
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2634
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2635
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2636
}
2637
2638
void CGOpenMPRuntime::emitDistributeStaticInit(
2639
CodeGenFunction &CGF, SourceLocation Loc,
2640
OpenMPDistScheduleClauseKind SchedKind,
2641
const CGOpenMPRuntime::StaticRTInput &Values) {
2642
OpenMPSchedType ScheduleNum =
2643
getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2644
llvm::Value *UpdatedLocation =
2645
emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2646
llvm::Value *ThreadId = getThreadID(CGF, Loc);
2647
llvm::FunctionCallee StaticInitFunction;
2648
bool isGPUDistribute =
2649
CGM.getLangOpts().OpenMPIsTargetDevice &&
2650
(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2651
StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2652
Values.IVSize, Values.IVSigned, isGPUDistribute);
2653
2654
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2655
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2656
OMPC_SCHEDULE_MODIFIER_unknown, Values);
2657
}
2658
2659
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2660
SourceLocation Loc,
2661
OpenMPDirectiveKind DKind) {
2662
assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2663
DKind == OMPD_sections) &&
2664
"Expected distribute, for, or sections directive kind");
2665
if (!CGF.HaveInsertPoint())
2666
return;
2667
// Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2668
llvm::Value *Args[] = {
2669
emitUpdateLocation(CGF, Loc,
2670
isOpenMPDistributeDirective(DKind) ||
2671
(DKind == OMPD_target_teams_loop)
2672
? OMP_IDENT_WORK_DISTRIBUTE
2673
: isOpenMPLoopDirective(DKind)
2674
? OMP_IDENT_WORK_LOOP
2675
: OMP_IDENT_WORK_SECTIONS),
2676
getThreadID(CGF, Loc)};
2677
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2678
if (isOpenMPDistributeDirective(DKind) &&
2679
CGM.getLangOpts().OpenMPIsTargetDevice &&
2680
(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2681
CGF.EmitRuntimeCall(
2682
OMPBuilder.getOrCreateRuntimeFunction(
2683
CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2684
Args);
2685
else
2686
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2687
CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2688
Args);
2689
}
2690
2691
void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2692
SourceLocation Loc,
2693
unsigned IVSize,
2694
bool IVSigned) {
2695
if (!CGF.HaveInsertPoint())
2696
return;
2697
// Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2698
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2699
CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2700
Args);
2701
}
2702
2703
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2704
SourceLocation Loc, unsigned IVSize,
2705
bool IVSigned, Address IL,
2706
Address LB, Address UB,
2707
Address ST) {
2708
// Call __kmpc_dispatch_next(
2709
// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2710
// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2711
// kmp_int[32|64] *p_stride);
2712
llvm::Value *Args[] = {
2713
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2714
IL.emitRawPointer(CGF), // &isLastIter
2715
LB.emitRawPointer(CGF), // &Lower
2716
UB.emitRawPointer(CGF), // &Upper
2717
ST.emitRawPointer(CGF) // &Stride
2718
};
2719
llvm::Value *Call = CGF.EmitRuntimeCall(
2720
OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2721
return CGF.EmitScalarConversion(
2722
Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2723
CGF.getContext().BoolTy, Loc);
2724
}
2725
2726
void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2727
llvm::Value *NumThreads,
2728
SourceLocation Loc) {
2729
if (!CGF.HaveInsertPoint())
2730
return;
2731
// Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2732
llvm::Value *Args[] = {
2733
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2734
CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2735
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2736
CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2737
Args);
2738
}
2739
2740
void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2741
ProcBindKind ProcBind,
2742
SourceLocation Loc) {
2743
if (!CGF.HaveInsertPoint())
2744
return;
2745
assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2746
// Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2747
llvm::Value *Args[] = {
2748
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2749
llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2750
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2751
CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2752
Args);
2753
}
2754
2755
void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2756
SourceLocation Loc, llvm::AtomicOrdering AO) {
2757
if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2758
OMPBuilder.createFlush(CGF.Builder);
2759
} else {
2760
if (!CGF.HaveInsertPoint())
2761
return;
2762
// Build call void __kmpc_flush(ident_t *loc)
2763
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2764
CGM.getModule(), OMPRTL___kmpc_flush),
2765
emitUpdateLocation(CGF, Loc));
2766
}
2767
}
2768
2769
namespace {
2770
/// Indexes of fields for type kmp_task_t.
2771
enum KmpTaskTFields {
2772
/// List of shared variables.
2773
KmpTaskTShareds,
2774
/// Task routine.
2775
KmpTaskTRoutine,
2776
/// Partition id for the untied tasks.
2777
KmpTaskTPartId,
2778
/// Function with call of destructors for private variables.
2779
Data1,
2780
/// Task priority.
2781
Data2,
2782
/// (Taskloops only) Lower bound.
2783
KmpTaskTLowerBound,
2784
/// (Taskloops only) Upper bound.
2785
KmpTaskTUpperBound,
2786
/// (Taskloops only) Stride.
2787
KmpTaskTStride,
2788
/// (Taskloops only) Is last iteration flag.
2789
KmpTaskTLastIter,
2790
/// (Taskloops only) Reduction data.
2791
KmpTaskTReductions,
2792
};
2793
} // anonymous namespace
2794
2795
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2796
// If we are in simd mode or there are no entries, we don't need to do
2797
// anything.
2798
if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2799
return;
2800
2801
llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2802
[this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2803
const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2804
SourceLocation Loc;
2805
if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2806
for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2807
E = CGM.getContext().getSourceManager().fileinfo_end();
2808
I != E; ++I) {
2809
if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2810
I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2811
Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2812
I->getFirst(), EntryInfo.Line, 1);
2813
break;
2814
}
2815
}
2816
}
2817
switch (Kind) {
2818
case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2819
unsigned DiagID = CGM.getDiags().getCustomDiagID(
2820
DiagnosticsEngine::Error, "Offloading entry for target region in "
2821
"%0 is incorrect: either the "
2822
"address or the ID is invalid.");
2823
CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2824
} break;
2825
case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2826
unsigned DiagID = CGM.getDiags().getCustomDiagID(
2827
DiagnosticsEngine::Error, "Offloading entry for declare target "
2828
"variable %0 is incorrect: the "
2829
"address is invalid.");
2830
CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2831
} break;
2832
case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2833
unsigned DiagID = CGM.getDiags().getCustomDiagID(
2834
DiagnosticsEngine::Error,
2835
"Offloading entry for declare target variable is incorrect: the "
2836
"address is invalid.");
2837
CGM.getDiags().Report(DiagID);
2838
} break;
2839
}
2840
};
2841
2842
OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2843
}
2844
2845
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2846
if (!KmpRoutineEntryPtrTy) {
2847
// Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2848
ASTContext &C = CGM.getContext();
2849
QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2850
FunctionProtoType::ExtProtoInfo EPI;
2851
KmpRoutineEntryPtrQTy = C.getPointerType(
2852
C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2853
KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2854
}
2855
}
2856
2857
namespace {
2858
struct PrivateHelpersTy {
2859
PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2860
const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2861
: OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2862
PrivateElemInit(PrivateElemInit) {}
2863
PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2864
const Expr *OriginalRef = nullptr;
2865
const VarDecl *Original = nullptr;
2866
const VarDecl *PrivateCopy = nullptr;
2867
const VarDecl *PrivateElemInit = nullptr;
2868
bool isLocalPrivate() const {
2869
return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2870
}
2871
};
2872
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2873
} // anonymous namespace
2874
2875
static bool isAllocatableDecl(const VarDecl *VD) {
2876
const VarDecl *CVD = VD->getCanonicalDecl();
2877
if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2878
return false;
2879
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2880
// Use the default allocation.
2881
return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2882
!AA->getAllocator());
2883
}
2884
2885
static RecordDecl *
2886
createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2887
if (!Privates.empty()) {
2888
ASTContext &C = CGM.getContext();
2889
// Build struct .kmp_privates_t. {
2890
// /* private vars */
2891
// };
2892
RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2893
RD->startDefinition();
2894
for (const auto &Pair : Privates) {
2895
const VarDecl *VD = Pair.second.Original;
2896
QualType Type = VD->getType().getNonReferenceType();
2897
// If the private variable is a local variable with lvalue ref type,
2898
// allocate the pointer instead of the pointee type.
2899
if (Pair.second.isLocalPrivate()) {
2900
if (VD->getType()->isLValueReferenceType())
2901
Type = C.getPointerType(Type);
2902
if (isAllocatableDecl(VD))
2903
Type = C.getPointerType(Type);
2904
}
2905
FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2906
if (VD->hasAttrs()) {
2907
for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2908
E(VD->getAttrs().end());
2909
I != E; ++I)
2910
FD->addAttr(*I);
2911
}
2912
}
2913
RD->completeDefinition();
2914
return RD;
2915
}
2916
return nullptr;
2917
}
2918
2919
static RecordDecl *
2920
createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2921
QualType KmpInt32Ty,
2922
QualType KmpRoutineEntryPointerQTy) {
2923
ASTContext &C = CGM.getContext();
2924
// Build struct kmp_task_t {
2925
// void * shareds;
2926
// kmp_routine_entry_t routine;
2927
// kmp_int32 part_id;
2928
// kmp_cmplrdata_t data1;
2929
// kmp_cmplrdata_t data2;
2930
// For taskloops additional fields:
2931
// kmp_uint64 lb;
2932
// kmp_uint64 ub;
2933
// kmp_int64 st;
2934
// kmp_int32 liter;
2935
// void * reductions;
2936
// };
2937
RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2938
UD->startDefinition();
2939
addFieldToRecordDecl(C, UD, KmpInt32Ty);
2940
addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2941
UD->completeDefinition();
2942
QualType KmpCmplrdataTy = C.getRecordType(UD);
2943
RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2944
RD->startDefinition();
2945
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2946
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2947
addFieldToRecordDecl(C, RD, KmpInt32Ty);
2948
addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2949
addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2950
if (isOpenMPTaskLoopDirective(Kind)) {
2951
QualType KmpUInt64Ty =
2952
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2953
QualType KmpInt64Ty =
2954
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2955
addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2956
addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2957
addFieldToRecordDecl(C, RD, KmpInt64Ty);
2958
addFieldToRecordDecl(C, RD, KmpInt32Ty);
2959
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2960
}
2961
RD->completeDefinition();
2962
return RD;
2963
}
2964
2965
static RecordDecl *
2966
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2967
ArrayRef<PrivateDataTy> Privates) {
2968
ASTContext &C = CGM.getContext();
2969
// Build struct kmp_task_t_with_privates {
2970
// kmp_task_t task_data;
2971
// .kmp_privates_t. privates;
2972
// };
2973
RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2974
RD->startDefinition();
2975
addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2976
if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2977
addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2978
RD->completeDefinition();
2979
return RD;
2980
}
2981
2982
/// Emit a proxy function which accepts kmp_task_t as the second
2983
/// argument.
2984
/// \code
2985
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2986
/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2987
/// For taskloops:
2988
/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2989
/// tt->reductions, tt->shareds);
2990
/// return 0;
2991
/// }
2992
/// \endcode
2993
static llvm::Function *
2994
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2995
OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2996
QualType KmpTaskTWithPrivatesPtrQTy,
2997
QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2998
QualType SharedsPtrTy, llvm::Function *TaskFunction,
2999
llvm::Value *TaskPrivatesMap) {
3000
ASTContext &C = CGM.getContext();
3001
FunctionArgList Args;
3002
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3003
ImplicitParamKind::Other);
3004
ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3005
KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3006
ImplicitParamKind::Other);
3007
Args.push_back(&GtidArg);
3008
Args.push_back(&TaskTypeArg);
3009
const auto &TaskEntryFnInfo =
3010
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3011
llvm::FunctionType *TaskEntryTy =
3012
CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3013
std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3014
auto *TaskEntry = llvm::Function::Create(
3015
TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3016
CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3017
TaskEntry->setDoesNotRecurse();
3018
CodeGenFunction CGF(CGM);
3019
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3020
Loc, Loc);
3021
3022
// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3023
// tt,
3024
// For taskloops:
3025
// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3026
// tt->task_data.shareds);
3027
llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3028
CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3029
LValue TDBase = CGF.EmitLoadOfPointerLValue(
3030
CGF.GetAddrOfLocalVar(&TaskTypeArg),
3031
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3032
const auto *KmpTaskTWithPrivatesQTyRD =
3033
cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3034
LValue Base =
3035
CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3036
const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3037
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3038
LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3039
llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3040
3041
auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3042
LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3043
llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044
CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3045
CGF.ConvertTypeForMem(SharedsPtrTy));
3046
3047
auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3048
llvm::Value *PrivatesParam;
3049
if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3050
LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3051
PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3052
PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3053
} else {
3054
PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3055
}
3056
3057
llvm::Value *CommonArgs[] = {
3058
GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3059
CGF.Builder
3060
.CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3061
CGF.VoidPtrTy, CGF.Int8Ty)
3062
.emitRawPointer(CGF)};
3063
SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3064
std::end(CommonArgs));
3065
if (isOpenMPTaskLoopDirective(Kind)) {
3066
auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3067
LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3068
llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3069
auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3070
LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3071
llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3072
auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3073
LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3074
llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3075
auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3076
LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3077
llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3078
auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3079
LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3080
llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3081
CallArgs.push_back(LBParam);
3082
CallArgs.push_back(UBParam);
3083
CallArgs.push_back(StParam);
3084
CallArgs.push_back(LIParam);
3085
CallArgs.push_back(RParam);
3086
}
3087
CallArgs.push_back(SharedsParam);
3088
3089
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3090
CallArgs);
3091
CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3092
CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3093
CGF.FinishFunction();
3094
return TaskEntry;
3095
}
3096
3097
static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3098
SourceLocation Loc,
3099
QualType KmpInt32Ty,
3100
QualType KmpTaskTWithPrivatesPtrQTy,
3101
QualType KmpTaskTWithPrivatesQTy) {
3102
ASTContext &C = CGM.getContext();
3103
FunctionArgList Args;
3104
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3105
ImplicitParamKind::Other);
3106
ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3107
KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3108
ImplicitParamKind::Other);
3109
Args.push_back(&GtidArg);
3110
Args.push_back(&TaskTypeArg);
3111
const auto &DestructorFnInfo =
3112
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3113
llvm::FunctionType *DestructorFnTy =
3114
CGM.getTypes().GetFunctionType(DestructorFnInfo);
3115
std::string Name =
3116
CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3117
auto *DestructorFn =
3118
llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3119
Name, &CGM.getModule());
3120
CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3121
DestructorFnInfo);
3122
DestructorFn->setDoesNotRecurse();
3123
CodeGenFunction CGF(CGM);
3124
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3125
Args, Loc, Loc);
3126
3127
LValue Base = CGF.EmitLoadOfPointerLValue(
3128
CGF.GetAddrOfLocalVar(&TaskTypeArg),
3129
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3130
const auto *KmpTaskTWithPrivatesQTyRD =
3131
cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3132
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3133
Base = CGF.EmitLValueForField(Base, *FI);
3134
for (const auto *Field :
3135
cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3136
if (QualType::DestructionKind DtorKind =
3137
Field->getType().isDestructedType()) {
3138
LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3139
CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3140
}
3141
}
3142
CGF.FinishFunction();
3143
return DestructorFn;
3144
}
3145
3146
/// Emit a privates mapping function for correct handling of private and
3147
/// firstprivate variables.
3148
/// \code
3149
/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3150
/// **noalias priv1,..., <tyn> **noalias privn) {
3151
/// *priv1 = &.privates.priv1;
3152
/// ...;
3153
/// *privn = &.privates.privn;
3154
/// }
3155
/// \endcode
3156
static llvm::Value *
3157
emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3158
const OMPTaskDataTy &Data, QualType PrivatesQTy,
3159
ArrayRef<PrivateDataTy> Privates) {
3160
ASTContext &C = CGM.getContext();
3161
FunctionArgList Args;
3162
ImplicitParamDecl TaskPrivatesArg(
3163
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3164
C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3165
ImplicitParamKind::Other);
3166
Args.push_back(&TaskPrivatesArg);
3167
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3168
unsigned Counter = 1;
3169
for (const Expr *E : Data.PrivateVars) {
3170
Args.push_back(ImplicitParamDecl::Create(
3171
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172
C.getPointerType(C.getPointerType(E->getType()))
3173
.withConst()
3174
.withRestrict(),
3175
ImplicitParamKind::Other));
3176
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177
PrivateVarsPos[VD] = Counter;
3178
++Counter;
3179
}
3180
for (const Expr *E : Data.FirstprivateVars) {
3181
Args.push_back(ImplicitParamDecl::Create(
3182
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3183
C.getPointerType(C.getPointerType(E->getType()))
3184
.withConst()
3185
.withRestrict(),
3186
ImplicitParamKind::Other));
3187
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3188
PrivateVarsPos[VD] = Counter;
3189
++Counter;
3190
}
3191
for (const Expr *E : Data.LastprivateVars) {
3192
Args.push_back(ImplicitParamDecl::Create(
3193
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3194
C.getPointerType(C.getPointerType(E->getType()))
3195
.withConst()
3196
.withRestrict(),
3197
ImplicitParamKind::Other));
3198
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3199
PrivateVarsPos[VD] = Counter;
3200
++Counter;
3201
}
3202
for (const VarDecl *VD : Data.PrivateLocals) {
3203
QualType Ty = VD->getType().getNonReferenceType();
3204
if (VD->getType()->isLValueReferenceType())
3205
Ty = C.getPointerType(Ty);
3206
if (isAllocatableDecl(VD))
3207
Ty = C.getPointerType(Ty);
3208
Args.push_back(ImplicitParamDecl::Create(
3209
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3210
C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3211
ImplicitParamKind::Other));
3212
PrivateVarsPos[VD] = Counter;
3213
++Counter;
3214
}
3215
const auto &TaskPrivatesMapFnInfo =
3216
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3217
llvm::FunctionType *TaskPrivatesMapTy =
3218
CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3219
std::string Name =
3220
CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3221
auto *TaskPrivatesMap = llvm::Function::Create(
3222
TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3223
&CGM.getModule());
3224
CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3225
TaskPrivatesMapFnInfo);
3226
if (CGM.getLangOpts().Optimize) {
3227
TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3228
TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3229
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3230
}
3231
CodeGenFunction CGF(CGM);
3232
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3233
TaskPrivatesMapFnInfo, Args, Loc, Loc);
3234
3235
// *privi = &.privates.privi;
3236
LValue Base = CGF.EmitLoadOfPointerLValue(
3237
CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3238
TaskPrivatesArg.getType()->castAs<PointerType>());
3239
const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3240
Counter = 0;
3241
for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3242
LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3243
const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3244
LValue RefLVal =
3245
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3246
LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3247
RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3248
CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3249
++Counter;
3250
}
3251
CGF.FinishFunction();
3252
return TaskPrivatesMap;
3253
}
3254
3255
/// Emit initialization for private variables in task-based directives.
3256
static void emitPrivatesInit(CodeGenFunction &CGF,
3257
const OMPExecutableDirective &D,
3258
Address KmpTaskSharedsPtr, LValue TDBase,
3259
const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3260
QualType SharedsTy, QualType SharedsPtrTy,
3261
const OMPTaskDataTy &Data,
3262
ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3263
ASTContext &C = CGF.getContext();
3264
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3265
LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3266
OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3267
? OMPD_taskloop
3268
: OMPD_task;
3269
const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3270
CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3271
LValue SrcBase;
3272
bool IsTargetTask =
3273
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3274
isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3275
// For target-based directives skip 4 firstprivate arrays BasePointersArray,
3276
// PointersArray, SizesArray, and MappersArray. The original variables for
3277
// these arrays are not captured and we get their addresses explicitly.
3278
if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3279
(IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3280
SrcBase = CGF.MakeAddrLValue(
3281
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3282
KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3283
CGF.ConvertTypeForMem(SharedsTy)),
3284
SharedsTy);
3285
}
3286
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3287
for (const PrivateDataTy &Pair : Privates) {
3288
// Do not initialize private locals.
3289
if (Pair.second.isLocalPrivate()) {
3290
++FI;
3291
continue;
3292
}
3293
const VarDecl *VD = Pair.second.PrivateCopy;
3294
const Expr *Init = VD->getAnyInitializer();
3295
if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3296
!CGF.isTrivialInitializer(Init)))) {
3297
LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3298
if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3299
const VarDecl *OriginalVD = Pair.second.Original;
3300
// Check if the variable is the target-based BasePointersArray,
3301
// PointersArray, SizesArray, or MappersArray.
3302
LValue SharedRefLValue;
3303
QualType Type = PrivateLValue.getType();
3304
const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3305
if (IsTargetTask && !SharedField) {
3306
assert(isa<ImplicitParamDecl>(OriginalVD) &&
3307
isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3308
cast<CapturedDecl>(OriginalVD->getDeclContext())
3309
->getNumParams() == 0 &&
3310
isa<TranslationUnitDecl>(
3311
cast<CapturedDecl>(OriginalVD->getDeclContext())
3312
->getDeclContext()) &&
3313
"Expected artificial target data variable.");
3314
SharedRefLValue =
3315
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3316
} else if (ForDup) {
3317
SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3318
SharedRefLValue = CGF.MakeAddrLValue(
3319
SharedRefLValue.getAddress().withAlignment(
3320
C.getDeclAlign(OriginalVD)),
3321
SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3322
SharedRefLValue.getTBAAInfo());
3323
} else if (CGF.LambdaCaptureFields.count(
3324
Pair.second.Original->getCanonicalDecl()) > 0 ||
3325
isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3326
SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3327
} else {
3328
// Processing for implicitly captured variables.
3329
InlinedOpenMPRegionRAII Region(
3330
CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3331
/*HasCancel=*/false, /*NoInheritance=*/true);
3332
SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3333
}
3334
if (Type->isArrayType()) {
3335
// Initialize firstprivate array.
3336
if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3337
// Perform simple memcpy.
3338
CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3339
} else {
3340
// Initialize firstprivate array using element-by-element
3341
// initialization.
3342
CGF.EmitOMPAggregateAssign(
3343
PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3344
[&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3345
Address SrcElement) {
3346
// Clean up any temporaries needed by the initialization.
3347
CodeGenFunction::OMPPrivateScope InitScope(CGF);
3348
InitScope.addPrivate(Elem, SrcElement);
3349
(void)InitScope.Privatize();
3350
// Emit initialization for single element.
3351
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3352
CGF, &CapturesInfo);
3353
CGF.EmitAnyExprToMem(Init, DestElement,
3354
Init->getType().getQualifiers(),
3355
/*IsInitializer=*/false);
3356
});
3357
}
3358
} else {
3359
CodeGenFunction::OMPPrivateScope InitScope(CGF);
3360
InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3361
(void)InitScope.Privatize();
3362
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3363
CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3364
/*capturedByInit=*/false);
3365
}
3366
} else {
3367
CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3368
}
3369
}
3370
++FI;
3371
}
3372
}
3373
3374
/// Check if duplication function is required for taskloops.
3375
static bool checkInitIsRequired(CodeGenFunction &CGF,
3376
ArrayRef<PrivateDataTy> Privates) {
3377
bool InitRequired = false;
3378
for (const PrivateDataTy &Pair : Privates) {
3379
if (Pair.second.isLocalPrivate())
3380
continue;
3381
const VarDecl *VD = Pair.second.PrivateCopy;
3382
const Expr *Init = VD->getAnyInitializer();
3383
InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3384
!CGF.isTrivialInitializer(Init));
3385
if (InitRequired)
3386
break;
3387
}
3388
return InitRequired;
3389
}
3390
3391
3392
/// Emit task_dup function (for initialization of
3393
/// private/firstprivate/lastprivate vars and last_iter flag)
3394
/// \code
3395
/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3396
/// lastpriv) {
3397
/// // setup lastprivate flag
3398
/// task_dst->last = lastpriv;
3399
/// // could be constructor calls here...
3400
/// }
3401
/// \endcode
3402
static llvm::Value *
3403
emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3404
const OMPExecutableDirective &D,
3405
QualType KmpTaskTWithPrivatesPtrQTy,
3406
const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3407
const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3408
QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3409
ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3410
ASTContext &C = CGM.getContext();
3411
FunctionArgList Args;
3412
ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3413
KmpTaskTWithPrivatesPtrQTy,
3414
ImplicitParamKind::Other);
3415
ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3416
KmpTaskTWithPrivatesPtrQTy,
3417
ImplicitParamKind::Other);
3418
ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3419
ImplicitParamKind::Other);
3420
Args.push_back(&DstArg);
3421
Args.push_back(&SrcArg);
3422
Args.push_back(&LastprivArg);
3423
const auto &TaskDupFnInfo =
3424
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3425
llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3426
std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3427
auto *TaskDup = llvm::Function::Create(
3428
TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3429
CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3430
TaskDup->setDoesNotRecurse();
3431
CodeGenFunction CGF(CGM);
3432
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3433
Loc);
3434
3435
LValue TDBase = CGF.EmitLoadOfPointerLValue(
3436
CGF.GetAddrOfLocalVar(&DstArg),
3437
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3438
// task_dst->liter = lastpriv;
3439
if (WithLastIter) {
3440
auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3441
LValue Base = CGF.EmitLValueForField(
3442
TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3443
LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3444
llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3445
CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3446
CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3447
}
3448
3449
// Emit initial values for private copies (if any).
3450
assert(!Privates.empty());
3451
Address KmpTaskSharedsPtr = Address::invalid();
3452
if (!Data.FirstprivateVars.empty()) {
3453
LValue TDBase = CGF.EmitLoadOfPointerLValue(
3454
CGF.GetAddrOfLocalVar(&SrcArg),
3455
KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3456
LValue Base = CGF.EmitLValueForField(
3457
TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3458
KmpTaskSharedsPtr = Address(
3459
CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3460
Base, *std::next(KmpTaskTQTyRD->field_begin(),
3461
KmpTaskTShareds)),
3462
Loc),
3463
CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3464
}
3465
emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3466
SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3467
CGF.FinishFunction();
3468
return TaskDup;
3469
}
3470
3471
/// Checks if destructor function is required to be generated.
3472
/// \return true if cleanups are required, false otherwise.
3473
static bool
3474
checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3475
ArrayRef<PrivateDataTy> Privates) {
3476
for (const PrivateDataTy &P : Privates) {
3477
if (P.second.isLocalPrivate())
3478
continue;
3479
QualType Ty = P.second.Original->getType().getNonReferenceType();
3480
if (Ty.isDestructedType())
3481
return true;
3482
}
3483
return false;
3484
}
3485
3486
namespace {
3487
/// Loop generator for OpenMP iterator expression.
3488
class OMPIteratorGeneratorScope final
3489
: public CodeGenFunction::OMPPrivateScope {
3490
CodeGenFunction &CGF;
3491
const OMPIteratorExpr *E = nullptr;
3492
SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3493
SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3494
OMPIteratorGeneratorScope() = delete;
3495
OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3496
3497
public:
3498
OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3499
: CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3500
if (!E)
3501
return;
3502
SmallVector<llvm::Value *, 4> Uppers;
3503
for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3504
Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3505
const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3506
addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3507
const OMPIteratorHelperData &HelperData = E->getHelper(I);
3508
addPrivate(
3509
HelperData.CounterVD,
3510
CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3511
}
3512
Privatize();
3513
3514
for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3515
const OMPIteratorHelperData &HelperData = E->getHelper(I);
3516
LValue CLVal =
3517
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3518
HelperData.CounterVD->getType());
3519
// Counter = 0;
3520
CGF.EmitStoreOfScalar(
3521
llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3522
CLVal);
3523
CodeGenFunction::JumpDest &ContDest =
3524
ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3525
CodeGenFunction::JumpDest &ExitDest =
3526
ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3527
// N = <number-of_iterations>;
3528
llvm::Value *N = Uppers[I];
3529
// cont:
3530
// if (Counter < N) goto body; else goto exit;
3531
CGF.EmitBlock(ContDest.getBlock());
3532
auto *CVal =
3533
CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3534
llvm::Value *Cmp =
3535
HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3536
? CGF.Builder.CreateICmpSLT(CVal, N)
3537
: CGF.Builder.CreateICmpULT(CVal, N);
3538
llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3539
CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3540
// body:
3541
CGF.EmitBlock(BodyBB);
3542
// Iteri = Begini + Counter * Stepi;
3543
CGF.EmitIgnoredExpr(HelperData.Update);
3544
}
3545
}
3546
~OMPIteratorGeneratorScope() {
3547
if (!E)
3548
return;
3549
for (unsigned I = E->numOfIterators(); I > 0; --I) {
3550
// Counter = Counter + 1;
3551
const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3552
CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3553
// goto cont;
3554
CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3555
// exit:
3556
CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3557
}
3558
}
3559
};
3560
} // namespace
3561
3562
static std::pair<llvm::Value *, llvm::Value *>
3563
getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3564
const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3565
llvm::Value *Addr;
3566
if (OASE) {
3567
const Expr *Base = OASE->getBase();
3568
Addr = CGF.EmitScalarExpr(Base);
3569
} else {
3570
Addr = CGF.EmitLValue(E).getPointer(CGF);
3571
}
3572
llvm::Value *SizeVal;
3573
QualType Ty = E->getType();
3574
if (OASE) {
3575
SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3576
for (const Expr *SE : OASE->getDimensions()) {
3577
llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3578
Sz = CGF.EmitScalarConversion(
3579
Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3580
SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3581
}
3582
} else if (const auto *ASE =
3583
dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3584
LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3585
Address UpAddrAddress = UpAddrLVal.getAddress();
3586
llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3587
UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3588
/*Idx0=*/1);
3589
llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3590
llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3591
SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3592
} else {
3593
SizeVal = CGF.getTypeSize(Ty);
3594
}
3595
return std::make_pair(Addr, SizeVal);
3596
}
3597
3598
/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3599
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3600
QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3601
if (KmpTaskAffinityInfoTy.isNull()) {
3602
RecordDecl *KmpAffinityInfoRD =
3603
C.buildImplicitRecord("kmp_task_affinity_info_t");
3604
KmpAffinityInfoRD->startDefinition();
3605
addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3606
addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3607
addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3608
KmpAffinityInfoRD->completeDefinition();
3609
KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3610
}
3611
}
3612
3613
CGOpenMPRuntime::TaskResultTy
3614
CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3615
const OMPExecutableDirective &D,
3616
llvm::Function *TaskFunction, QualType SharedsTy,
3617
Address Shareds, const OMPTaskDataTy &Data) {
3618
ASTContext &C = CGM.getContext();
3619
llvm::SmallVector<PrivateDataTy, 4> Privates;
3620
// Aggregate privates and sort them by the alignment.
3621
const auto *I = Data.PrivateCopies.begin();
3622
for (const Expr *E : Data.PrivateVars) {
3623
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3624
Privates.emplace_back(
3625
C.getDeclAlign(VD),
3626
PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3627
/*PrivateElemInit=*/nullptr));
3628
++I;
3629
}
3630
I = Data.FirstprivateCopies.begin();
3631
const auto *IElemInitRef = Data.FirstprivateInits.begin();
3632
for (const Expr *E : Data.FirstprivateVars) {
3633
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3634
Privates.emplace_back(
3635
C.getDeclAlign(VD),
3636
PrivateHelpersTy(
3637
E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3638
cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3639
++I;
3640
++IElemInitRef;
3641
}
3642
I = Data.LastprivateCopies.begin();
3643
for (const Expr *E : Data.LastprivateVars) {
3644
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3645
Privates.emplace_back(
3646
C.getDeclAlign(VD),
3647
PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3648
/*PrivateElemInit=*/nullptr));
3649
++I;
3650
}
3651
for (const VarDecl *VD : Data.PrivateLocals) {
3652
if (isAllocatableDecl(VD))
3653
Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3654
else
3655
Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3656
}
3657
llvm::stable_sort(Privates,
3658
[](const PrivateDataTy &L, const PrivateDataTy &R) {
3659
return L.first > R.first;
3660
});
3661
QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3662
// Build type kmp_routine_entry_t (if not built yet).
3663
emitKmpRoutineEntryT(KmpInt32Ty);
3664
// Build type kmp_task_t (if not built yet).
3665
if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3666
if (SavedKmpTaskloopTQTy.isNull()) {
3667
SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3668
CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3669
}
3670
KmpTaskTQTy = SavedKmpTaskloopTQTy;
3671
} else {
3672
assert((D.getDirectiveKind() == OMPD_task ||
3673
isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3674
isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3675
"Expected taskloop, task or target directive");
3676
if (SavedKmpTaskTQTy.isNull()) {
3677
SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3678
CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3679
}
3680
KmpTaskTQTy = SavedKmpTaskTQTy;
3681
}
3682
const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3683
// Build particular struct kmp_task_t for the given task.
3684
const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3685
createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3686
QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3687
QualType KmpTaskTWithPrivatesPtrQTy =
3688
C.getPointerType(KmpTaskTWithPrivatesQTy);
3689
llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3690
llvm::Type *KmpTaskTWithPrivatesPtrTy =
3691
KmpTaskTWithPrivatesTy->getPointerTo();
3692
llvm::Value *KmpTaskTWithPrivatesTySize =
3693
CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3694
QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3695
3696
// Emit initial values for private copies (if any).
3697
llvm::Value *TaskPrivatesMap = nullptr;
3698
llvm::Type *TaskPrivatesMapTy =
3699
std::next(TaskFunction->arg_begin(), 3)->getType();
3700
if (!Privates.empty()) {
3701
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3702
TaskPrivatesMap =
3703
emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3704
TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3705
TaskPrivatesMap, TaskPrivatesMapTy);
3706
} else {
3707
TaskPrivatesMap = llvm::ConstantPointerNull::get(
3708
cast<llvm::PointerType>(TaskPrivatesMapTy));
3709
}
3710
// Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3711
// kmp_task_t *tt);
3712
llvm::Function *TaskEntry = emitProxyTaskFunction(
3713
CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3714
KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3715
TaskPrivatesMap);
3716
3717
// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3718
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3719
// kmp_routine_entry_t *task_entry);
3720
// Task flags. Format is taken from
3721
// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3722
// description of kmp_tasking_flags struct.
3723
enum {
3724
TiedFlag = 0x1,
3725
FinalFlag = 0x2,
3726
DestructorsFlag = 0x8,
3727
PriorityFlag = 0x20,
3728
DetachableFlag = 0x40,
3729
};
3730
unsigned Flags = Data.Tied ? TiedFlag : 0;
3731
bool NeedsCleanup = false;
3732
if (!Privates.empty()) {
3733
NeedsCleanup =
3734
checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3735
if (NeedsCleanup)
3736
Flags = Flags | DestructorsFlag;
3737
}
3738
if (Data.Priority.getInt())
3739
Flags = Flags | PriorityFlag;
3740
if (D.hasClausesOfKind<OMPDetachClause>())
3741
Flags = Flags | DetachableFlag;
3742
llvm::Value *TaskFlags =
3743
Data.Final.getPointer()
3744
? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3745
CGF.Builder.getInt32(FinalFlag),
3746
CGF.Builder.getInt32(/*C=*/0))
3747
: CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3748
TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3749
llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3750
SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3751
getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3752
SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3753
TaskEntry, KmpRoutineEntryPtrTy)};
3754
llvm::Value *NewTask;
3755
if (D.hasClausesOfKind<OMPNowaitClause>()) {
3756
// Check if we have any device clause associated with the directive.
3757
const Expr *Device = nullptr;
3758
if (auto *C = D.getSingleClause<OMPDeviceClause>())
3759
Device = C->getDevice();
3760
// Emit device ID if any otherwise use default value.
3761
llvm::Value *DeviceID;
3762
if (Device)
3763
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3764
CGF.Int64Ty, /*isSigned=*/true);
3765
else
3766
DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3767
AllocArgs.push_back(DeviceID);
3768
NewTask = CGF.EmitRuntimeCall(
3769
OMPBuilder.getOrCreateRuntimeFunction(
3770
CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3771
AllocArgs);
3772
} else {
3773
NewTask =
3774
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3775
CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3776
AllocArgs);
3777
}
3778
// Emit detach clause initialization.
3779
// evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3780
// task_descriptor);
3781
if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3782
const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3783
LValue EvtLVal = CGF.EmitLValue(Evt);
3784
3785
// Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3786
// int gtid, kmp_task_t *task);
3787
llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3788
llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3789
Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3790
llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3791
OMPBuilder.getOrCreateRuntimeFunction(
3792
CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3793
{Loc, Tid, NewTask});
3794
EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3795
Evt->getExprLoc());
3796
CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3797
}
3798
// Process affinity clauses.
3799
if (D.hasClausesOfKind<OMPAffinityClause>()) {
3800
// Process list of affinity data.
3801
ASTContext &C = CGM.getContext();
3802
Address AffinitiesArray = Address::invalid();
3803
// Calculate number of elements to form the array of affinity data.
3804
llvm::Value *NumOfElements = nullptr;
3805
unsigned NumAffinities = 0;
3806
for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3807
if (const Expr *Modifier = C->getModifier()) {
3808
const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3809
for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3810
llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3811
Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3812
NumOfElements =
3813
NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3814
}
3815
} else {
3816
NumAffinities += C->varlist_size();
3817
}
3818
}
3819
getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3820
// Fields ids in kmp_task_affinity_info record.
3821
enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3822
3823
QualType KmpTaskAffinityInfoArrayTy;
3824
if (NumOfElements) {
3825
NumOfElements = CGF.Builder.CreateNUWAdd(
3826
llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3827
auto *OVE = new (C) OpaqueValueExpr(
3828
Loc,
3829
C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3830
VK_PRValue);
3831
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3832
RValue::get(NumOfElements));
3833
KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3834
KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3835
/*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3836
// Properly emit variable-sized array.
3837
auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3838
ImplicitParamKind::Other);
3839
CGF.EmitVarDecl(*PD);
3840
AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3841
NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3842
/*isSigned=*/false);
3843
} else {
3844
KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3845
KmpTaskAffinityInfoTy,
3846
llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3847
ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3848
AffinitiesArray =
3849
CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3850
AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3851
NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3852
/*isSigned=*/false);
3853
}
3854
3855
const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3856
// Fill array by elements without iterators.
3857
unsigned Pos = 0;
3858
bool HasIterator = false;
3859
for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3860
if (C->getModifier()) {
3861
HasIterator = true;
3862
continue;
3863
}
3864
for (const Expr *E : C->varlists()) {
3865
llvm::Value *Addr;
3866
llvm::Value *Size;
3867
std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3868
LValue Base =
3869
CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3870
KmpTaskAffinityInfoTy);
3871
// affs[i].base_addr = &<Affinities[i].second>;
3872
LValue BaseAddrLVal = CGF.EmitLValueForField(
3873
Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3874
CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3875
BaseAddrLVal);
3876
// affs[i].len = sizeof(<Affinities[i].second>);
3877
LValue LenLVal = CGF.EmitLValueForField(
3878
Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3879
CGF.EmitStoreOfScalar(Size, LenLVal);
3880
++Pos;
3881
}
3882
}
3883
LValue PosLVal;
3884
if (HasIterator) {
3885
PosLVal = CGF.MakeAddrLValue(
3886
CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3887
C.getSizeType());
3888
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3889
}
3890
// Process elements with iterators.
3891
for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3892
const Expr *Modifier = C->getModifier();
3893
if (!Modifier)
3894
continue;
3895
OMPIteratorGeneratorScope IteratorScope(
3896
CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3897
for (const Expr *E : C->varlists()) {
3898
llvm::Value *Addr;
3899
llvm::Value *Size;
3900
std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3901
llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3902
LValue Base =
3903
CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3904
KmpTaskAffinityInfoTy);
3905
// affs[i].base_addr = &<Affinities[i].second>;
3906
LValue BaseAddrLVal = CGF.EmitLValueForField(
3907
Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3908
CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3909
BaseAddrLVal);
3910
// affs[i].len = sizeof(<Affinities[i].second>);
3911
LValue LenLVal = CGF.EmitLValueForField(
3912
Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3913
CGF.EmitStoreOfScalar(Size, LenLVal);
3914
Idx = CGF.Builder.CreateNUWAdd(
3915
Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3916
CGF.EmitStoreOfScalar(Idx, PosLVal);
3917
}
3918
}
3919
// Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3920
// kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3921
// naffins, kmp_task_affinity_info_t *affin_list);
3922
llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3923
llvm::Value *GTid = getThreadID(CGF, Loc);
3924
llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3925
AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3926
// FIXME: Emit the function and ignore its result for now unless the
3927
// runtime function is properly implemented.
3928
(void)CGF.EmitRuntimeCall(
3929
OMPBuilder.getOrCreateRuntimeFunction(
3930
CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3931
{LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3932
}
3933
llvm::Value *NewTaskNewTaskTTy =
3934
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3935
NewTask, KmpTaskTWithPrivatesPtrTy);
3936
LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3937
KmpTaskTWithPrivatesQTy);
3938
LValue TDBase =
3939
CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3940
// Fill the data in the resulting kmp_task_t record.
3941
// Copy shareds if there are any.
3942
Address KmpTaskSharedsPtr = Address::invalid();
3943
if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3944
KmpTaskSharedsPtr = Address(
3945
CGF.EmitLoadOfScalar(
3946
CGF.EmitLValueForField(
3947
TDBase,
3948
*std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3949
Loc),
3950
CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3951
LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3952
LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3953
CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3954
}
3955
// Emit initial values for private copies (if any).
3956
TaskResultTy Result;
3957
if (!Privates.empty()) {
3958
emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3959
SharedsTy, SharedsPtrTy, Data, Privates,
3960
/*ForDup=*/false);
3961
if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3962
(!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3963
Result.TaskDupFn = emitTaskDupFunction(
3964
CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3965
KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3966
/*WithLastIter=*/!Data.LastprivateVars.empty());
3967
}
3968
}
3969
// Fields of union "kmp_cmplrdata_t" for destructors and priority.
3970
enum { Priority = 0, Destructors = 1 };
3971
// Provide pointer to function with destructors for privates.
3972
auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3973
const RecordDecl *KmpCmplrdataUD =
3974
(*FI)->getType()->getAsUnionType()->getDecl();
3975
if (NeedsCleanup) {
3976
llvm::Value *DestructorFn = emitDestructorsFunction(
3977
CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3978
KmpTaskTWithPrivatesQTy);
3979
LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3980
LValue DestructorsLV = CGF.EmitLValueForField(
3981
Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3982
CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3983
DestructorFn, KmpRoutineEntryPtrTy),
3984
DestructorsLV);
3985
}
3986
// Set priority.
3987
if (Data.Priority.getInt()) {
3988
LValue Data2LV = CGF.EmitLValueForField(
3989
TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3990
LValue PriorityLV = CGF.EmitLValueForField(
3991
Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3992
CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3993
}
3994
Result.NewTask = NewTask;
3995
Result.TaskEntry = TaskEntry;
3996
Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3997
Result.TDBase = TDBase;
3998
Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3999
return Result;
4000
}
4001
4002
/// Translates internal dependency kind into the runtime kind.
4003
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4004
RTLDependenceKindTy DepKind;
4005
switch (K) {
4006
case OMPC_DEPEND_in:
4007
DepKind = RTLDependenceKindTy::DepIn;
4008
break;
4009
// Out and InOut dependencies must use the same code.
4010
case OMPC_DEPEND_out:
4011
case OMPC_DEPEND_inout:
4012
DepKind = RTLDependenceKindTy::DepInOut;
4013
break;
4014
case OMPC_DEPEND_mutexinoutset:
4015
DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4016
break;
4017
case OMPC_DEPEND_inoutset:
4018
DepKind = RTLDependenceKindTy::DepInOutSet;
4019
break;
4020
case OMPC_DEPEND_outallmemory:
4021
DepKind = RTLDependenceKindTy::DepOmpAllMem;
4022
break;
4023
case OMPC_DEPEND_source:
4024
case OMPC_DEPEND_sink:
4025
case OMPC_DEPEND_depobj:
4026
case OMPC_DEPEND_inoutallmemory:
4027
case OMPC_DEPEND_unknown:
4028
llvm_unreachable("Unknown task dependence type");
4029
}
4030
return DepKind;
4031
}
4032
4033
/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4034
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4035
QualType &FlagsTy) {
4036
FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4037
if (KmpDependInfoTy.isNull()) {
4038
RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4039
KmpDependInfoRD->startDefinition();
4040
addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4041
addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4042
addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4043
KmpDependInfoRD->completeDefinition();
4044
KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4045
}
4046
}
4047
4048
std::pair<llvm::Value *, LValue>
4049
CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4050
SourceLocation Loc) {
4051
ASTContext &C = CGM.getContext();
4052
QualType FlagsTy;
4053
getDependTypes(C, KmpDependInfoTy, FlagsTy);
4054
RecordDecl *KmpDependInfoRD =
4055
cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4056
QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4057
LValue Base = CGF.EmitLoadOfPointerLValue(
4058
DepobjLVal.getAddress().withElementType(
4059
CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4060
KmpDependInfoPtrTy->castAs<PointerType>());
4061
Address DepObjAddr = CGF.Builder.CreateGEP(
4062
CGF, Base.getAddress(),
4063
llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4064
LValue NumDepsBase = CGF.MakeAddrLValue(
4065
DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4066
// NumDeps = deps[i].base_addr;
4067
LValue BaseAddrLVal = CGF.EmitLValueForField(
4068
NumDepsBase,
4069
*std::next(KmpDependInfoRD->field_begin(),
4070
static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4071
llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4072
return std::make_pair(NumDeps, Base);
4073
}
4074
4075
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4076
llvm::PointerUnion<unsigned *, LValue *> Pos,
4077
const OMPTaskDataTy::DependData &Data,
4078
Address DependenciesArray) {
4079
CodeGenModule &CGM = CGF.CGM;
4080
ASTContext &C = CGM.getContext();
4081
QualType FlagsTy;
4082
getDependTypes(C, KmpDependInfoTy, FlagsTy);
4083
RecordDecl *KmpDependInfoRD =
4084
cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4085
llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4086
4087
OMPIteratorGeneratorScope IteratorScope(
4088
CGF, cast_or_null<OMPIteratorExpr>(
4089
Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4090
: nullptr));
4091
for (const Expr *E : Data.DepExprs) {
4092
llvm::Value *Addr;
4093
llvm::Value *Size;
4094
4095
// The expression will be a nullptr in the 'omp_all_memory' case.
4096
if (E) {
4097
std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4098
Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4099
} else {
4100
Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4101
Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4102
}
4103
LValue Base;
4104
if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4105
Base = CGF.MakeAddrLValue(
4106
CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4107
} else {
4108
assert(E && "Expected a non-null expression");
4109
LValue &PosLVal = *Pos.get<LValue *>();
4110
llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4111
Base = CGF.MakeAddrLValue(
4112
CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4113
}
4114
// deps[i].base_addr = &<Dependencies[i].second>;
4115
LValue BaseAddrLVal = CGF.EmitLValueForField(
4116
Base,
4117
*std::next(KmpDependInfoRD->field_begin(),
4118
static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4119
CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4120
// deps[i].len = sizeof(<Dependencies[i].second>);
4121
LValue LenLVal = CGF.EmitLValueForField(
4122
Base, *std::next(KmpDependInfoRD->field_begin(),
4123
static_cast<unsigned int>(RTLDependInfoFields::Len)));
4124
CGF.EmitStoreOfScalar(Size, LenLVal);
4125
// deps[i].flags = <Dependencies[i].first>;
4126
RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4127
LValue FlagsLVal = CGF.EmitLValueForField(
4128
Base,
4129
*std::next(KmpDependInfoRD->field_begin(),
4130
static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4131
CGF.EmitStoreOfScalar(
4132
llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4133
FlagsLVal);
4134
if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4135
++(*P);
4136
} else {
4137
LValue &PosLVal = *Pos.get<LValue *>();
4138
llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4139
Idx = CGF.Builder.CreateNUWAdd(Idx,
4140
llvm::ConstantInt::get(Idx->getType(), 1));
4141
CGF.EmitStoreOfScalar(Idx, PosLVal);
4142
}
4143
}
4144
}
4145
4146
SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4147
CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4148
const OMPTaskDataTy::DependData &Data) {
4149
assert(Data.DepKind == OMPC_DEPEND_depobj &&
4150
"Expected depobj dependency kind.");
4151
SmallVector<llvm::Value *, 4> Sizes;
4152
SmallVector<LValue, 4> SizeLVals;
4153
ASTContext &C = CGF.getContext();
4154
{
4155
OMPIteratorGeneratorScope IteratorScope(
4156
CGF, cast_or_null<OMPIteratorExpr>(
4157
Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4158
: nullptr));
4159
for (const Expr *E : Data.DepExprs) {
4160
llvm::Value *NumDeps;
4161
LValue Base;
4162
LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4163
std::tie(NumDeps, Base) =
4164
getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4165
LValue NumLVal = CGF.MakeAddrLValue(
4166
CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4167
C.getUIntPtrType());
4168
CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4169
NumLVal.getAddress());
4170
llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4171
llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4172
CGF.EmitStoreOfScalar(Add, NumLVal);
4173
SizeLVals.push_back(NumLVal);
4174
}
4175
}
4176
for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4177
llvm::Value *Size =
4178
CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4179
Sizes.push_back(Size);
4180
}
4181
return Sizes;
4182
}
4183
4184
void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4185
QualType &KmpDependInfoTy,
4186
LValue PosLVal,
4187
const OMPTaskDataTy::DependData &Data,
4188
Address DependenciesArray) {
4189
assert(Data.DepKind == OMPC_DEPEND_depobj &&
4190
"Expected depobj dependency kind.");
4191
llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4192
{
4193
OMPIteratorGeneratorScope IteratorScope(
4194
CGF, cast_or_null<OMPIteratorExpr>(
4195
Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4196
: nullptr));
4197
for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4198
const Expr *E = Data.DepExprs[I];
4199
llvm::Value *NumDeps;
4200
LValue Base;
4201
LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4202
std::tie(NumDeps, Base) =
4203
getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4204
4205
// memcopy dependency data.
4206
llvm::Value *Size = CGF.Builder.CreateNUWMul(
4207
ElSize,
4208
CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4209
llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4210
Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4211
CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4212
4213
// Increase pos.
4214
// pos += size;
4215
llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4216
CGF.EmitStoreOfScalar(Add, PosLVal);
4217
}
4218
}
4219
}
4220
4221
std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4222
CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4223
SourceLocation Loc) {
4224
if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4225
return D.DepExprs.empty();
4226
}))
4227
return std::make_pair(nullptr, Address::invalid());
4228
// Process list of dependencies.
4229
ASTContext &C = CGM.getContext();
4230
Address DependenciesArray = Address::invalid();
4231
llvm::Value *NumOfElements = nullptr;
4232
unsigned NumDependencies = std::accumulate(
4233
Dependencies.begin(), Dependencies.end(), 0,
4234
[](unsigned V, const OMPTaskDataTy::DependData &D) {
4235
return D.DepKind == OMPC_DEPEND_depobj
4236
? V
4237
: (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4238
});
4239
QualType FlagsTy;
4240
getDependTypes(C, KmpDependInfoTy, FlagsTy);
4241
bool HasDepobjDeps = false;
4242
bool HasRegularWithIterators = false;
4243
llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4244
llvm::Value *NumOfRegularWithIterators =
4245
llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4246
// Calculate number of depobj dependencies and regular deps with the
4247
// iterators.
4248
for (const OMPTaskDataTy::DependData &D : Dependencies) {
4249
if (D.DepKind == OMPC_DEPEND_depobj) {
4250
SmallVector<llvm::Value *, 4> Sizes =
4251
emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4252
for (llvm::Value *Size : Sizes) {
4253
NumOfDepobjElements =
4254
CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4255
}
4256
HasDepobjDeps = true;
4257
continue;
4258
}
4259
// Include number of iterations, if any.
4260
4261
if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4262
llvm::Value *ClauseIteratorSpace =
4263
llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4264
for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4265
llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4266
Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4267
ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4268
}
4269
llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4270
ClauseIteratorSpace,
4271
llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4272
NumOfRegularWithIterators =
4273
CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4274
HasRegularWithIterators = true;
4275
continue;
4276
}
4277
}
4278
4279
QualType KmpDependInfoArrayTy;
4280
if (HasDepobjDeps || HasRegularWithIterators) {
4281
NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4282
/*isSigned=*/false);
4283
if (HasDepobjDeps) {
4284
NumOfElements =
4285
CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4286
}
4287
if (HasRegularWithIterators) {
4288
NumOfElements =
4289
CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4290
}
4291
auto *OVE = new (C) OpaqueValueExpr(
4292
Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4293
VK_PRValue);
4294
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4295
RValue::get(NumOfElements));
4296
KmpDependInfoArrayTy =
4297
C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4298
/*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4299
// CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4300
// Properly emit variable-sized array.
4301
auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4302
ImplicitParamKind::Other);
4303
CGF.EmitVarDecl(*PD);
4304
DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4305
NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4306
/*isSigned=*/false);
4307
} else {
4308
KmpDependInfoArrayTy = C.getConstantArrayType(
4309
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4310
ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4311
DependenciesArray =
4312
CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4313
DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4314
NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4315
/*isSigned=*/false);
4316
}
4317
unsigned Pos = 0;
4318
for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4319
if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4320
Dependencies[I].IteratorExpr)
4321
continue;
4322
emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4323
DependenciesArray);
4324
}
4325
// Copy regular dependencies with iterators.
4326
LValue PosLVal = CGF.MakeAddrLValue(
4327
CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4328
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4329
for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4330
if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4331
!Dependencies[I].IteratorExpr)
4332
continue;
4333
emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4334
DependenciesArray);
4335
}
4336
// Copy final depobj arrays without iterators.
4337
if (HasDepobjDeps) {
4338
for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4339
if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4340
continue;
4341
emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4342
DependenciesArray);
4343
}
4344
}
4345
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4346
DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4347
return std::make_pair(NumOfElements, DependenciesArray);
4348
}
4349
4350
Address CGOpenMPRuntime::emitDepobjDependClause(
4351
CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4352
SourceLocation Loc) {
4353
if (Dependencies.DepExprs.empty())
4354
return Address::invalid();
4355
// Process list of dependencies.
4356
ASTContext &C = CGM.getContext();
4357
Address DependenciesArray = Address::invalid();
4358
unsigned NumDependencies = Dependencies.DepExprs.size();
4359
QualType FlagsTy;
4360
getDependTypes(C, KmpDependInfoTy, FlagsTy);
4361
RecordDecl *KmpDependInfoRD =
4362
cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4363
4364
llvm::Value *Size;
4365
// Define type kmp_depend_info[<Dependencies.size()>];
4366
// For depobj reserve one extra element to store the number of elements.
4367
// It is required to handle depobj(x) update(in) construct.
4368
// kmp_depend_info[<Dependencies.size()>] deps;
4369
llvm::Value *NumDepsVal;
4370
CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4371
if (const auto *IE =
4372
cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4373
NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4374
for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4375
llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4376
Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4377
NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4378
}
4379
Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4380
NumDepsVal);
4381
CharUnits SizeInBytes =
4382
C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4383
llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4384
Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4385
NumDepsVal =
4386
CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4387
} else {
4388
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4389
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4390
nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4391
CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4392
Size = CGM.getSize(Sz.alignTo(Align));
4393
NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4394
}
4395
// Need to allocate on the dynamic memory.
4396
llvm::Value *ThreadID = getThreadID(CGF, Loc);
4397
// Use default allocator.
4398
llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4399
llvm::Value *Args[] = {ThreadID, Size, Allocator};
4400
4401
llvm::Value *Addr =
4402
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4403
CGM.getModule(), OMPRTL___kmpc_alloc),
4404
Args, ".dep.arr.addr");
4405
llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4406
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4407
Addr, KmpDependInfoLlvmTy->getPointerTo());
4408
DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4409
// Write number of elements in the first element of array for depobj.
4410
LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4411
// deps[i].base_addr = NumDependencies;
4412
LValue BaseAddrLVal = CGF.EmitLValueForField(
4413
Base,
4414
*std::next(KmpDependInfoRD->field_begin(),
4415
static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4416
CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4417
llvm::PointerUnion<unsigned *, LValue *> Pos;
4418
unsigned Idx = 1;
4419
LValue PosLVal;
4420
if (Dependencies.IteratorExpr) {
4421
PosLVal = CGF.MakeAddrLValue(
4422
CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4423
C.getSizeType());
4424
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4425
/*IsInit=*/true);
4426
Pos = &PosLVal;
4427
} else {
4428
Pos = &Idx;
4429
}
4430
emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4431
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4432
CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4433
CGF.Int8Ty);
4434
return DependenciesArray;
4435
}
4436
4437
void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4438
SourceLocation Loc) {
4439
ASTContext &C = CGM.getContext();
4440
QualType FlagsTy;
4441
getDependTypes(C, KmpDependInfoTy, FlagsTy);
4442
LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4443
C.VoidPtrTy.castAs<PointerType>());
4444
QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4445
Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4446
Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4447
CGF.ConvertTypeForMem(KmpDependInfoTy));
4448
llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4449
Addr.getElementType(), Addr.emitRawPointer(CGF),
4450
llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4451
DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4452
CGF.VoidPtrTy);
4453
llvm::Value *ThreadID = getThreadID(CGF, Loc);
4454
// Use default allocator.
4455
llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4456
llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4457
4458
// _kmpc_free(gtid, addr, nullptr);
4459
(void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4460
CGM.getModule(), OMPRTL___kmpc_free),
4461
Args);
4462
}
4463
4464
void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4465
OpenMPDependClauseKind NewDepKind,
4466
SourceLocation Loc) {
4467
ASTContext &C = CGM.getContext();
4468
QualType FlagsTy;
4469
getDependTypes(C, KmpDependInfoTy, FlagsTy);
4470
RecordDecl *KmpDependInfoRD =
4471
cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4472
llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4473
llvm::Value *NumDeps;
4474
LValue Base;
4475
std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4476
4477
Address Begin = Base.getAddress();
4478
// Cast from pointer to array type to pointer to single element.
4479
llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4480
Begin.emitRawPointer(CGF), NumDeps);
4481
// The basic structure here is a while-do loop.
4482
llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4483
llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4484
llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4485
CGF.EmitBlock(BodyBB);
4486
llvm::PHINode *ElementPHI =
4487
CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4488
ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4489
Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4490
Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4491
Base.getTBAAInfo());
4492
// deps[i].flags = NewDepKind;
4493
RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4494
LValue FlagsLVal = CGF.EmitLValueForField(
4495
Base, *std::next(KmpDependInfoRD->field_begin(),
4496
static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4497
CGF.EmitStoreOfScalar(
4498
llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4499
FlagsLVal);
4500
4501
// Shift the address forward by one element.
4502
llvm::Value *ElementNext =
4503
CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4504
.emitRawPointer(CGF);
4505
ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4506
llvm::Value *IsEmpty =
4507
CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4508
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4509
// Done.
4510
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4511
}
4512
4513
void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4514
const OMPExecutableDirective &D,
4515
llvm::Function *TaskFunction,
4516
QualType SharedsTy, Address Shareds,
4517
const Expr *IfCond,
4518
const OMPTaskDataTy &Data) {
4519
if (!CGF.HaveInsertPoint())
4520
return;
4521
4522
TaskResultTy Result =
4523
emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4524
llvm::Value *NewTask = Result.NewTask;
4525
llvm::Function *TaskEntry = Result.TaskEntry;
4526
llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4527
LValue TDBase = Result.TDBase;
4528
const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4529
// Process list of dependences.
4530
Address DependenciesArray = Address::invalid();
4531
llvm::Value *NumOfElements;
4532
std::tie(NumOfElements, DependenciesArray) =
4533
emitDependClause(CGF, Data.Dependences, Loc);
4534
4535
// NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4536
// libcall.
4537
// Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4538
// kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4539
// kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4540
// list is not empty
4541
llvm::Value *ThreadID = getThreadID(CGF, Loc);
4542
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4543
llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4544
llvm::Value *DepTaskArgs[7];
4545
if (!Data.Dependences.empty()) {
4546
DepTaskArgs[0] = UpLoc;
4547
DepTaskArgs[1] = ThreadID;
4548
DepTaskArgs[2] = NewTask;
4549
DepTaskArgs[3] = NumOfElements;
4550
DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4551
DepTaskArgs[5] = CGF.Builder.getInt32(0);
4552
DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4553
}
4554
auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4555
&DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4556
if (!Data.Tied) {
4557
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4558
LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4559
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4560
}
4561
if (!Data.Dependences.empty()) {
4562
CGF.EmitRuntimeCall(
4563
OMPBuilder.getOrCreateRuntimeFunction(
4564
CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4565
DepTaskArgs);
4566
} else {
4567
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4568
CGM.getModule(), OMPRTL___kmpc_omp_task),
4569
TaskArgs);
4570
}
4571
// Check if parent region is untied and build return for untied task;
4572
if (auto *Region =
4573
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4574
Region->emitUntiedSwitch(CGF);
4575
};
4576
4577
llvm::Value *DepWaitTaskArgs[7];
4578
if (!Data.Dependences.empty()) {
4579
DepWaitTaskArgs[0] = UpLoc;
4580
DepWaitTaskArgs[1] = ThreadID;
4581
DepWaitTaskArgs[2] = NumOfElements;
4582
DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4583
DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4584
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4585
DepWaitTaskArgs[6] =
4586
llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4587
}
4588
auto &M = CGM.getModule();
4589
auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4590
TaskEntry, &Data, &DepWaitTaskArgs,
4591
Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4592
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4593
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4594
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4595
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4596
// is specified.
4597
if (!Data.Dependences.empty())
4598
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4599
M, OMPRTL___kmpc_omp_taskwait_deps_51),
4600
DepWaitTaskArgs);
4601
// Call proxy_task_entry(gtid, new_task);
4602
auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4603
Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4604
Action.Enter(CGF);
4605
llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4606
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4607
OutlinedFnArgs);
4608
};
4609
4610
// Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4611
// kmp_task_t *new_task);
4612
// Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4613
// kmp_task_t *new_task);
4614
RegionCodeGenTy RCG(CodeGen);
4615
CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4616
M, OMPRTL___kmpc_omp_task_begin_if0),
4617
TaskArgs,
4618
OMPBuilder.getOrCreateRuntimeFunction(
4619
M, OMPRTL___kmpc_omp_task_complete_if0),
4620
TaskArgs);
4621
RCG.setAction(Action);
4622
RCG(CGF);
4623
};
4624
4625
if (IfCond) {
4626
emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4627
} else {
4628
RegionCodeGenTy ThenRCG(ThenCodeGen);
4629
ThenRCG(CGF);
4630
}
4631
}
4632
4633
void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4634
const OMPLoopDirective &D,
4635
llvm::Function *TaskFunction,
4636
QualType SharedsTy, Address Shareds,
4637
const Expr *IfCond,
4638
const OMPTaskDataTy &Data) {
4639
if (!CGF.HaveInsertPoint())
4640
return;
4641
TaskResultTy Result =
4642
emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4643
// NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4644
// libcall.
4645
// Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4646
// if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4647
// sched, kmp_uint64 grainsize, void *task_dup);
4648
llvm::Value *ThreadID = getThreadID(CGF, Loc);
4649
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4650
llvm::Value *IfVal;
4651
if (IfCond) {
4652
IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4653
/*isSigned=*/true);
4654
} else {
4655
IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4656
}
4657
4658
LValue LBLVal = CGF.EmitLValueForField(
4659
Result.TDBase,
4660
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4661
const auto *LBVar =
4662
cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4663
CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4664
/*IsInitializer=*/true);
4665
LValue UBLVal = CGF.EmitLValueForField(
4666
Result.TDBase,
4667
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4668
const auto *UBVar =
4669
cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4670
CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4671
/*IsInitializer=*/true);
4672
LValue StLVal = CGF.EmitLValueForField(
4673
Result.TDBase,
4674
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4675
const auto *StVar =
4676
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4677
CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4678
/*IsInitializer=*/true);
4679
// Store reductions address.
4680
LValue RedLVal = CGF.EmitLValueForField(
4681
Result.TDBase,
4682
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4683
if (Data.Reductions) {
4684
CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4685
} else {
4686
CGF.EmitNullInitialization(RedLVal.getAddress(),
4687
CGF.getContext().VoidPtrTy);
4688
}
4689
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4690
llvm::Value *TaskArgs[] = {
4691
UpLoc,
4692
ThreadID,
4693
Result.NewTask,
4694
IfVal,
4695
LBLVal.getPointer(CGF),
4696
UBLVal.getPointer(CGF),
4697
CGF.EmitLoadOfScalar(StLVal, Loc),
4698
llvm::ConstantInt::getSigned(
4699
CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4700
llvm::ConstantInt::getSigned(
4701
CGF.IntTy, Data.Schedule.getPointer()
4702
? Data.Schedule.getInt() ? NumTasks : Grainsize
4703
: NoSchedule),
4704
Data.Schedule.getPointer()
4705
? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4706
/*isSigned=*/false)
4707
: llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4708
Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4709
Result.TaskDupFn, CGF.VoidPtrTy)
4710
: llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4711
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4712
CGM.getModule(), OMPRTL___kmpc_taskloop),
4713
TaskArgs);
4714
}
4715
4716
/// Emit reduction operation for each element of array (required for
4717
/// array sections) LHS op = RHS.
4718
/// \param Type Type of array.
4719
/// \param LHSVar Variable on the left side of the reduction operation
4720
/// (references element of array in original variable).
4721
/// \param RHSVar Variable on the right side of the reduction operation
4722
/// (references element of array in original variable).
4723
/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4724
/// RHSVar.
4725
static void EmitOMPAggregateReduction(
4726
CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4727
const VarDecl *RHSVar,
4728
const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4729
const Expr *, const Expr *)> &RedOpGen,
4730
const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4731
const Expr *UpExpr = nullptr) {
4732
// Perform element-by-element initialization.
4733
QualType ElementTy;
4734
Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4735
Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4736
4737
// Drill down to the base element type on both arrays.
4738
const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4739
llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4740
4741
llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4742
llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4743
// Cast from pointer to array type to pointer to single element.
4744
llvm::Value *LHSEnd =
4745
CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4746
// The basic structure here is a while-do loop.
4747
llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4748
llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4749
llvm::Value *IsEmpty =
4750
CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4751
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4752
4753
// Enter the loop body, making that address the current address.
4754
llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4755
CGF.EmitBlock(BodyBB);
4756
4757
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4758
4759
llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4760
RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4761
RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4762
Address RHSElementCurrent(
4763
RHSElementPHI, RHSAddr.getElementType(),
4764
RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4765
4766
llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4767
LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4768
LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4769
Address LHSElementCurrent(
4770
LHSElementPHI, LHSAddr.getElementType(),
4771
LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4772
4773
// Emit copy.
4774
CodeGenFunction::OMPPrivateScope Scope(CGF);
4775
Scope.addPrivate(LHSVar, LHSElementCurrent);
4776
Scope.addPrivate(RHSVar, RHSElementCurrent);
4777
Scope.Privatize();
4778
RedOpGen(CGF, XExpr, EExpr, UpExpr);
4779
Scope.ForceCleanup();
4780
4781
// Shift the address forward by one element.
4782
llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4783
LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4784
"omp.arraycpy.dest.element");
4785
llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4786
RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4787
"omp.arraycpy.src.element");
4788
// Check whether we've reached the end.
4789
llvm::Value *Done =
4790
CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4791
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4792
LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4793
RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4794
4795
// Done.
4796
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4797
}
4798
4799
/// Emit reduction combiner. If the combiner is a simple expression emit it as
4800
/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4801
/// UDR combiner function.
4802
static void emitReductionCombiner(CodeGenFunction &CGF,
4803
const Expr *ReductionOp) {
4804
if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4805
if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4806
if (const auto *DRE =
4807
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4808
if (const auto *DRD =
4809
dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4810
std::pair<llvm::Function *, llvm::Function *> Reduction =
4811
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4812
RValue Func = RValue::get(Reduction.first);
4813
CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4814
CGF.EmitIgnoredExpr(ReductionOp);
4815
return;
4816
}
4817
CGF.EmitIgnoredExpr(ReductionOp);
4818
}
4819
4820
llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4821
StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4822
ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4823
ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4824
ASTContext &C = CGM.getContext();
4825
4826
// void reduction_func(void *LHSArg, void *RHSArg);
4827
FunctionArgList Args;
4828
ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4829
ImplicitParamKind::Other);
4830
ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4831
ImplicitParamKind::Other);
4832
Args.push_back(&LHSArg);
4833
Args.push_back(&RHSArg);
4834
const auto &CGFI =
4835
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4836
std::string Name = getReductionFuncName(ReducerName);
4837
auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4838
llvm::GlobalValue::InternalLinkage, Name,
4839
&CGM.getModule());
4840
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4841
Fn->setDoesNotRecurse();
4842
CodeGenFunction CGF(CGM);
4843
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4844
4845
// Dst = (void*[n])(LHSArg);
4846
// Src = (void*[n])(RHSArg);
4847
Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4848
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4849
ArgsElemType->getPointerTo()),
4850
ArgsElemType, CGF.getPointerAlign());
4851
Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4852
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4853
ArgsElemType->getPointerTo()),
4854
ArgsElemType, CGF.getPointerAlign());
4855
4856
// ...
4857
// *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4858
// ...
4859
CodeGenFunction::OMPPrivateScope Scope(CGF);
4860
const auto *IPriv = Privates.begin();
4861
unsigned Idx = 0;
4862
for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4863
const auto *RHSVar =
4864
cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4865
Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4866
const auto *LHSVar =
4867
cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4868
Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4869
QualType PrivTy = (*IPriv)->getType();
4870
if (PrivTy->isVariablyModifiedType()) {
4871
// Get array size and emit VLA type.
4872
++Idx;
4873
Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4874
llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4875
const VariableArrayType *VLA =
4876
CGF.getContext().getAsVariableArrayType(PrivTy);
4877
const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4878
CodeGenFunction::OpaqueValueMapping OpaqueMap(
4879
CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4880
CGF.EmitVariablyModifiedType(PrivTy);
4881
}
4882
}
4883
Scope.Privatize();
4884
IPriv = Privates.begin();
4885
const auto *ILHS = LHSExprs.begin();
4886
const auto *IRHS = RHSExprs.begin();
4887
for (const Expr *E : ReductionOps) {
4888
if ((*IPriv)->getType()->isArrayType()) {
4889
// Emit reduction for array section.
4890
const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4891
const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4892
EmitOMPAggregateReduction(
4893
CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4894
[=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4895
emitReductionCombiner(CGF, E);
4896
});
4897
} else {
4898
// Emit reduction for array subscript or single variable.
4899
emitReductionCombiner(CGF, E);
4900
}
4901
++IPriv;
4902
++ILHS;
4903
++IRHS;
4904
}
4905
Scope.ForceCleanup();
4906
CGF.FinishFunction();
4907
return Fn;
4908
}
4909
4910
void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4911
const Expr *ReductionOp,
4912
const Expr *PrivateRef,
4913
const DeclRefExpr *LHS,
4914
const DeclRefExpr *RHS) {
4915
if (PrivateRef->getType()->isArrayType()) {
4916
// Emit reduction for array section.
4917
const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4918
const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4919
EmitOMPAggregateReduction(
4920
CGF, PrivateRef->getType(), LHSVar, RHSVar,
4921
[=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4922
emitReductionCombiner(CGF, ReductionOp);
4923
});
4924
} else {
4925
// Emit reduction for array subscript or single variable.
4926
emitReductionCombiner(CGF, ReductionOp);
4927
}
4928
}
4929
4930
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4931
ArrayRef<const Expr *> Privates,
4932
ArrayRef<const Expr *> LHSExprs,
4933
ArrayRef<const Expr *> RHSExprs,
4934
ArrayRef<const Expr *> ReductionOps,
4935
ReductionOptionsTy Options) {
4936
if (!CGF.HaveInsertPoint())
4937
return;
4938
4939
bool WithNowait = Options.WithNowait;
4940
bool SimpleReduction = Options.SimpleReduction;
4941
4942
// Next code should be emitted for reduction:
4943
//
4944
// static kmp_critical_name lock = { 0 };
4945
//
4946
// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4947
// *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4948
// ...
4949
// *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4950
// *(Type<n>-1*)rhs[<n>-1]);
4951
// }
4952
//
4953
// ...
4954
// void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4955
// switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4956
// RedList, reduce_func, &<lock>)) {
4957
// case 1:
4958
// ...
4959
// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4960
// ...
4961
// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4962
// break;
4963
// case 2:
4964
// ...
4965
// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4966
// ...
4967
// [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4968
// break;
4969
// default:;
4970
// }
4971
//
4972
// if SimpleReduction is true, only the next code is generated:
4973
// ...
4974
// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4975
// ...
4976
4977
ASTContext &C = CGM.getContext();
4978
4979
if (SimpleReduction) {
4980
CodeGenFunction::RunCleanupsScope Scope(CGF);
4981
const auto *IPriv = Privates.begin();
4982
const auto *ILHS = LHSExprs.begin();
4983
const auto *IRHS = RHSExprs.begin();
4984
for (const Expr *E : ReductionOps) {
4985
emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4986
cast<DeclRefExpr>(*IRHS));
4987
++IPriv;
4988
++ILHS;
4989
++IRHS;
4990
}
4991
return;
4992
}
4993
4994
// 1. Build a list of reduction variables.
4995
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4996
auto Size = RHSExprs.size();
4997
for (const Expr *E : Privates) {
4998
if (E->getType()->isVariablyModifiedType())
4999
// Reserve place for array size.
5000
++Size;
5001
}
5002
llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5003
QualType ReductionArrayTy = C.getConstantArrayType(
5004
C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5005
/*IndexTypeQuals=*/0);
5006
RawAddress ReductionList =
5007
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5008
const auto *IPriv = Privates.begin();
5009
unsigned Idx = 0;
5010
for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5011
Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5012
CGF.Builder.CreateStore(
5013
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5014
CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5015
Elem);
5016
if ((*IPriv)->getType()->isVariablyModifiedType()) {
5017
// Store array size.
5018
++Idx;
5019
Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5020
llvm::Value *Size = CGF.Builder.CreateIntCast(
5021
CGF.getVLASize(
5022
CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5023
.NumElts,
5024
CGF.SizeTy, /*isSigned=*/false);
5025
CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5026
Elem);
5027
}
5028
}
5029
5030
// 2. Emit reduce_func().
5031
llvm::Function *ReductionFn = emitReductionFunction(
5032
CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5033
Privates, LHSExprs, RHSExprs, ReductionOps);
5034
5035
// 3. Create static kmp_critical_name lock = { 0 };
5036
std::string Name = getName({"reduction"});
5037
llvm::Value *Lock = getCriticalRegionLock(Name);
5038
5039
// 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5040
// RedList, reduce_func, &<lock>);
5041
llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5042
llvm::Value *ThreadId = getThreadID(CGF, Loc);
5043
llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5044
llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045
ReductionList.getPointer(), CGF.VoidPtrTy);
5046
llvm::Value *Args[] = {
5047
IdentTLoc, // ident_t *<loc>
5048
ThreadId, // i32 <gtid>
5049
CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5050
ReductionArrayTySize, // size_type sizeof(RedList)
5051
RL, // void *RedList
5052
ReductionFn, // void (*) (void *, void *) <reduce_func>
5053
Lock // kmp_critical_name *&<lock>
5054
};
5055
llvm::Value *Res = CGF.EmitRuntimeCall(
5056
OMPBuilder.getOrCreateRuntimeFunction(
5057
CGM.getModule(),
5058
WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5059
Args);
5060
5061
// 5. Build switch(res)
5062
llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5063
llvm::SwitchInst *SwInst =
5064
CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5065
5066
// 6. Build case 1:
5067
// ...
5068
// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5069
// ...
5070
// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5071
// break;
5072
llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5073
SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5074
CGF.EmitBlock(Case1BB);
5075
5076
// Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5077
llvm::Value *EndArgs[] = {
5078
IdentTLoc, // ident_t *<loc>
5079
ThreadId, // i32 <gtid>
5080
Lock // kmp_critical_name *&<lock>
5081
};
5082
auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5083
CodeGenFunction &CGF, PrePostActionTy &Action) {
5084
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5085
const auto *IPriv = Privates.begin();
5086
const auto *ILHS = LHSExprs.begin();
5087
const auto *IRHS = RHSExprs.begin();
5088
for (const Expr *E : ReductionOps) {
5089
RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5090
cast<DeclRefExpr>(*IRHS));
5091
++IPriv;
5092
++ILHS;
5093
++IRHS;
5094
}
5095
};
5096
RegionCodeGenTy RCG(CodeGen);
5097
CommonActionTy Action(
5098
nullptr, std::nullopt,
5099
OMPBuilder.getOrCreateRuntimeFunction(
5100
CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5101
: OMPRTL___kmpc_end_reduce),
5102
EndArgs);
5103
RCG.setAction(Action);
5104
RCG(CGF);
5105
5106
CGF.EmitBranch(DefaultBB);
5107
5108
// 7. Build case 2:
5109
// ...
5110
// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5111
// ...
5112
// break;
5113
llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5114
SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5115
CGF.EmitBlock(Case2BB);
5116
5117
auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5118
CodeGenFunction &CGF, PrePostActionTy &Action) {
5119
const auto *ILHS = LHSExprs.begin();
5120
const auto *IRHS = RHSExprs.begin();
5121
const auto *IPriv = Privates.begin();
5122
for (const Expr *E : ReductionOps) {
5123
const Expr *XExpr = nullptr;
5124
const Expr *EExpr = nullptr;
5125
const Expr *UpExpr = nullptr;
5126
BinaryOperatorKind BO = BO_Comma;
5127
if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5128
if (BO->getOpcode() == BO_Assign) {
5129
XExpr = BO->getLHS();
5130
UpExpr = BO->getRHS();
5131
}
5132
}
5133
// Try to emit update expression as a simple atomic.
5134
const Expr *RHSExpr = UpExpr;
5135
if (RHSExpr) {
5136
// Analyze RHS part of the whole expression.
5137
if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5138
RHSExpr->IgnoreParenImpCasts())) {
5139
// If this is a conditional operator, analyze its condition for
5140
// min/max reduction operator.
5141
RHSExpr = ACO->getCond();
5142
}
5143
if (const auto *BORHS =
5144
dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5145
EExpr = BORHS->getRHS();
5146
BO = BORHS->getOpcode();
5147
}
5148
}
5149
if (XExpr) {
5150
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5151
auto &&AtomicRedGen = [BO, VD,
5152
Loc](CodeGenFunction &CGF, const Expr *XExpr,
5153
const Expr *EExpr, const Expr *UpExpr) {
5154
LValue X = CGF.EmitLValue(XExpr);
5155
RValue E;
5156
if (EExpr)
5157
E = CGF.EmitAnyExpr(EExpr);
5158
CGF.EmitOMPAtomicSimpleUpdateExpr(
5159
X, E, BO, /*IsXLHSInRHSPart=*/true,
5160
llvm::AtomicOrdering::Monotonic, Loc,
5161
[&CGF, UpExpr, VD, Loc](RValue XRValue) {
5162
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5163
Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5164
CGF.emitOMPSimpleStore(
5165
CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5166
VD->getType().getNonReferenceType(), Loc);
5167
PrivateScope.addPrivate(VD, LHSTemp);
5168
(void)PrivateScope.Privatize();
5169
return CGF.EmitAnyExpr(UpExpr);
5170
});
5171
};
5172
if ((*IPriv)->getType()->isArrayType()) {
5173
// Emit atomic reduction for array section.
5174
const auto *RHSVar =
5175
cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5176
EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5177
AtomicRedGen, XExpr, EExpr, UpExpr);
5178
} else {
5179
// Emit atomic reduction for array subscript or single variable.
5180
AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5181
}
5182
} else {
5183
// Emit as a critical region.
5184
auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5185
const Expr *, const Expr *) {
5186
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5187
std::string Name = RT.getName({"atomic_reduction"});
5188
RT.emitCriticalRegion(
5189
CGF, Name,
5190
[=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5191
Action.Enter(CGF);
5192
emitReductionCombiner(CGF, E);
5193
},
5194
Loc);
5195
};
5196
if ((*IPriv)->getType()->isArrayType()) {
5197
const auto *LHSVar =
5198
cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5199
const auto *RHSVar =
5200
cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5201
EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5202
CritRedGen);
5203
} else {
5204
CritRedGen(CGF, nullptr, nullptr, nullptr);
5205
}
5206
}
5207
++ILHS;
5208
++IRHS;
5209
++IPriv;
5210
}
5211
};
5212
RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5213
if (!WithNowait) {
5214
// Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5215
llvm::Value *EndArgs[] = {
5216
IdentTLoc, // ident_t *<loc>
5217
ThreadId, // i32 <gtid>
5218
Lock // kmp_critical_name *&<lock>
5219
};
5220
CommonActionTy Action(nullptr, std::nullopt,
5221
OMPBuilder.getOrCreateRuntimeFunction(
5222
CGM.getModule(), OMPRTL___kmpc_end_reduce),
5223
EndArgs);
5224
AtomicRCG.setAction(Action);
5225
AtomicRCG(CGF);
5226
} else {
5227
AtomicRCG(CGF);
5228
}
5229
5230
CGF.EmitBranch(DefaultBB);
5231
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5232
}
5233
5234
/// Generates unique name for artificial threadprivate variables.
5235
/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5236
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5237
const Expr *Ref) {
5238
SmallString<256> Buffer;
5239
llvm::raw_svector_ostream Out(Buffer);
5240
const clang::DeclRefExpr *DE;
5241
const VarDecl *D = ::getBaseDecl(Ref, DE);
5242
if (!D)
5243
D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5244
D = D->getCanonicalDecl();
5245
std::string Name = CGM.getOpenMPRuntime().getName(
5246
{D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5247
Out << Prefix << Name << "_"
5248
<< D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5249
return std::string(Out.str());
5250
}
5251
5252
/// Emits reduction initializer function:
5253
/// \code
5254
/// void @.red_init(void* %arg, void* %orig) {
5255
/// %0 = bitcast void* %arg to <type>*
5256
/// store <type> <init>, <type>* %0
5257
/// ret void
5258
/// }
5259
/// \endcode
5260
static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5261
SourceLocation Loc,
5262
ReductionCodeGen &RCG, unsigned N) {
5263
ASTContext &C = CGM.getContext();
5264
QualType VoidPtrTy = C.VoidPtrTy;
5265
VoidPtrTy.addRestrict();
5266
FunctionArgList Args;
5267
ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5268
ImplicitParamKind::Other);
5269
ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5270
ImplicitParamKind::Other);
5271
Args.emplace_back(&Param);
5272
Args.emplace_back(&ParamOrig);
5273
const auto &FnInfo =
5274
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5275
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5276
std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5277
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5278
Name, &CGM.getModule());
5279
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5280
Fn->setDoesNotRecurse();
5281
CodeGenFunction CGF(CGM);
5282
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5283
QualType PrivateType = RCG.getPrivateType(N);
5284
Address PrivateAddr = CGF.EmitLoadOfPointer(
5285
CGF.GetAddrOfLocalVar(&Param).withElementType(
5286
CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5287
C.getPointerType(PrivateType)->castAs<PointerType>());
5288
llvm::Value *Size = nullptr;
5289
// If the size of the reduction item is non-constant, load it from global
5290
// threadprivate variable.
5291
if (RCG.getSizes(N).second) {
5292
Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5293
CGF, CGM.getContext().getSizeType(),
5294
generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5295
Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5296
CGM.getContext().getSizeType(), Loc);
5297
}
5298
RCG.emitAggregateType(CGF, N, Size);
5299
Address OrigAddr = Address::invalid();
5300
// If initializer uses initializer from declare reduction construct, emit a
5301
// pointer to the address of the original reduction item (reuired by reduction
5302
// initializer)
5303
if (RCG.usesReductionInitializer(N)) {
5304
Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5305
OrigAddr = CGF.EmitLoadOfPointer(
5306
SharedAddr,
5307
CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5308
}
5309
// Emit the initializer:
5310
// %0 = bitcast void* %arg to <type>*
5311
// store <type> <init>, <type>* %0
5312
RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5313
[](CodeGenFunction &) { return false; });
5314
CGF.FinishFunction();
5315
return Fn;
5316
}
5317
5318
/// Emits reduction combiner function:
5319
/// \code
5320
/// void @.red_comb(void* %arg0, void* %arg1) {
5321
/// %lhs = bitcast void* %arg0 to <type>*
5322
/// %rhs = bitcast void* %arg1 to <type>*
5323
/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5324
/// store <type> %2, <type>* %lhs
5325
/// ret void
5326
/// }
5327
/// \endcode
5328
static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5329
SourceLocation Loc,
5330
ReductionCodeGen &RCG, unsigned N,
5331
const Expr *ReductionOp,
5332
const Expr *LHS, const Expr *RHS,
5333
const Expr *PrivateRef) {
5334
ASTContext &C = CGM.getContext();
5335
const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5336
const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5337
FunctionArgList Args;
5338
ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5339
C.VoidPtrTy, ImplicitParamKind::Other);
5340
ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5341
ImplicitParamKind::Other);
5342
Args.emplace_back(&ParamInOut);
5343
Args.emplace_back(&ParamIn);
5344
const auto &FnInfo =
5345
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5346
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5347
std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5348
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5349
Name, &CGM.getModule());
5350
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5351
Fn->setDoesNotRecurse();
5352
CodeGenFunction CGF(CGM);
5353
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5354
llvm::Value *Size = nullptr;
5355
// If the size of the reduction item is non-constant, load it from global
5356
// threadprivate variable.
5357
if (RCG.getSizes(N).second) {
5358
Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5359
CGF, CGM.getContext().getSizeType(),
5360
generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5361
Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5362
CGM.getContext().getSizeType(), Loc);
5363
}
5364
RCG.emitAggregateType(CGF, N, Size);
5365
// Remap lhs and rhs variables to the addresses of the function arguments.
5366
// %lhs = bitcast void* %arg0 to <type>*
5367
// %rhs = bitcast void* %arg1 to <type>*
5368
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5369
PrivateScope.addPrivate(
5370
LHSVD,
5371
// Pull out the pointer to the variable.
5372
CGF.EmitLoadOfPointer(
5373
CGF.GetAddrOfLocalVar(&ParamInOut)
5374
.withElementType(
5375
CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5376
C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5377
PrivateScope.addPrivate(
5378
RHSVD,
5379
// Pull out the pointer to the variable.
5380
CGF.EmitLoadOfPointer(
5381
CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5382
CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5383
C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5384
PrivateScope.Privatize();
5385
// Emit the combiner body:
5386
// %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5387
// store <type> %2, <type>* %lhs
5388
CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5389
CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5390
cast<DeclRefExpr>(RHS));
5391
CGF.FinishFunction();
5392
return Fn;
5393
}
5394
5395
/// Emits reduction finalizer function:
5396
/// \code
5397
/// void @.red_fini(void* %arg) {
5398
/// %0 = bitcast void* %arg to <type>*
5399
/// <destroy>(<type>* %0)
5400
/// ret void
5401
/// }
5402
/// \endcode
5403
static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5404
SourceLocation Loc,
5405
ReductionCodeGen &RCG, unsigned N) {
5406
if (!RCG.needCleanups(N))
5407
return nullptr;
5408
ASTContext &C = CGM.getContext();
5409
FunctionArgList Args;
5410
ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5411
ImplicitParamKind::Other);
5412
Args.emplace_back(&Param);
5413
const auto &FnInfo =
5414
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5415
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5416
std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5417
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5418
Name, &CGM.getModule());
5419
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5420
Fn->setDoesNotRecurse();
5421
CodeGenFunction CGF(CGM);
5422
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5423
Address PrivateAddr = CGF.EmitLoadOfPointer(
5424
CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5425
llvm::Value *Size = nullptr;
5426
// If the size of the reduction item is non-constant, load it from global
5427
// threadprivate variable.
5428
if (RCG.getSizes(N).second) {
5429
Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5430
CGF, CGM.getContext().getSizeType(),
5431
generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5432
Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5433
CGM.getContext().getSizeType(), Loc);
5434
}
5435
RCG.emitAggregateType(CGF, N, Size);
5436
// Emit the finalizer body:
5437
// <destroy>(<type>* %0)
5438
RCG.emitCleanups(CGF, N, PrivateAddr);
5439
CGF.FinishFunction(Loc);
5440
return Fn;
5441
}
5442
5443
llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5444
CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5445
ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5446
if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5447
return nullptr;
5448
5449
// Build typedef struct:
5450
// kmp_taskred_input {
5451
// void *reduce_shar; // shared reduction item
5452
// void *reduce_orig; // original reduction item used for initialization
5453
// size_t reduce_size; // size of data item
5454
// void *reduce_init; // data initialization routine
5455
// void *reduce_fini; // data finalization routine
5456
// void *reduce_comb; // data combiner routine
5457
// kmp_task_red_flags_t flags; // flags for additional info from compiler
5458
// } kmp_taskred_input_t;
5459
ASTContext &C = CGM.getContext();
5460
RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5461
RD->startDefinition();
5462
const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5463
const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5464
const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5465
const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5466
const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5467
const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5468
const FieldDecl *FlagsFD = addFieldToRecordDecl(
5469
C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5470
RD->completeDefinition();
5471
QualType RDType = C.getRecordType(RD);
5472
unsigned Size = Data.ReductionVars.size();
5473
llvm::APInt ArraySize(/*numBits=*/64, Size);
5474
QualType ArrayRDType =
5475
C.getConstantArrayType(RDType, ArraySize, nullptr,
5476
ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5477
// kmp_task_red_input_t .rd_input.[Size];
5478
RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5479
ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5480
Data.ReductionCopies, Data.ReductionOps);
5481
for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5482
// kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5483
llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5484
llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5485
llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5486
TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5487
/*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5488
".rd_input.gep.");
5489
LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5490
// ElemLVal.reduce_shar = &Shareds[Cnt];
5491
LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5492
RCG.emitSharedOrigLValue(CGF, Cnt);
5493
llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5494
CGF.EmitStoreOfScalar(Shared, SharedLVal);
5495
// ElemLVal.reduce_orig = &Origs[Cnt];
5496
LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5497
llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5498
CGF.EmitStoreOfScalar(Orig, OrigLVal);
5499
RCG.emitAggregateType(CGF, Cnt);
5500
llvm::Value *SizeValInChars;
5501
llvm::Value *SizeVal;
5502
std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5503
// We use delayed creation/initialization for VLAs and array sections. It is
5504
// required because runtime does not provide the way to pass the sizes of
5505
// VLAs/array sections to initializer/combiner/finalizer functions. Instead
5506
// threadprivate global variables are used to store these values and use
5507
// them in the functions.
5508
bool DelayedCreation = !!SizeVal;
5509
SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5510
/*isSigned=*/false);
5511
LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5512
CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5513
// ElemLVal.reduce_init = init;
5514
LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5515
llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5516
CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5517
// ElemLVal.reduce_fini = fini;
5518
LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5519
llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5520
llvm::Value *FiniAddr =
5521
Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5522
CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5523
// ElemLVal.reduce_comb = comb;
5524
LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5525
llvm::Value *CombAddr = emitReduceCombFunction(
5526
CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5527
RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5528
CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5529
// ElemLVal.flags = 0;
5530
LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5531
if (DelayedCreation) {
5532
CGF.EmitStoreOfScalar(
5533
llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5534
FlagsLVal);
5535
} else
5536
CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5537
}
5538
if (Data.IsReductionWithTaskMod) {
5539
// Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5540
// is_ws, int num, void *data);
5541
llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5542
llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5543
CGM.IntTy, /*isSigned=*/true);
5544
llvm::Value *Args[] = {
5545
IdentTLoc, GTid,
5546
llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5547
/*isSigned=*/true),
5548
llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5549
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5550
TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5551
return CGF.EmitRuntimeCall(
5552
OMPBuilder.getOrCreateRuntimeFunction(
5553
CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5554
Args);
5555
}
5556
// Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5557
llvm::Value *Args[] = {
5558
CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5559
/*isSigned=*/true),
5560
llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5561
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5562
CGM.VoidPtrTy)};
5563
return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5564
CGM.getModule(), OMPRTL___kmpc_taskred_init),
5565
Args);
5566
}
5567
5568
void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5569
SourceLocation Loc,
5570
bool IsWorksharingReduction) {
5571
// Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5572
// is_ws, int num, void *data);
5573
llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5574
llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5575
CGM.IntTy, /*isSigned=*/true);
5576
llvm::Value *Args[] = {IdentTLoc, GTid,
5577
llvm::ConstantInt::get(CGM.IntTy,
5578
IsWorksharingReduction ? 1 : 0,
5579
/*isSigned=*/true)};
5580
(void)CGF.EmitRuntimeCall(
5581
OMPBuilder.getOrCreateRuntimeFunction(
5582
CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5583
Args);
5584
}
5585
5586
void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5587
SourceLocation Loc,
5588
ReductionCodeGen &RCG,
5589
unsigned N) {
5590
auto Sizes = RCG.getSizes(N);
5591
// Emit threadprivate global variable if the type is non-constant
5592
// (Sizes.second = nullptr).
5593
if (Sizes.second) {
5594
llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5595
/*isSigned=*/false);
5596
Address SizeAddr = getAddrOfArtificialThreadPrivate(
5597
CGF, CGM.getContext().getSizeType(),
5598
generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5599
CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5600
}
5601
}
5602
5603
Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5604
SourceLocation Loc,
5605
llvm::Value *ReductionsPtr,
5606
LValue SharedLVal) {
5607
// Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5608
// *d);
5609
llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5610
CGM.IntTy,
5611
/*isSigned=*/true),
5612
ReductionsPtr,
5613
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5614
SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5615
return Address(
5616
CGF.EmitRuntimeCall(
5617
OMPBuilder.getOrCreateRuntimeFunction(
5618
CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5619
Args),
5620
CGF.Int8Ty, SharedLVal.getAlignment());
5621
}
5622
5623
void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5624
const OMPTaskDataTy &Data) {
5625
if (!CGF.HaveInsertPoint())
5626
return;
5627
5628
if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5629
// TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5630
OMPBuilder.createTaskwait(CGF.Builder);
5631
} else {
5632
llvm::Value *ThreadID = getThreadID(CGF, Loc);
5633
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5634
auto &M = CGM.getModule();
5635
Address DependenciesArray = Address::invalid();
5636
llvm::Value *NumOfElements;
5637
std::tie(NumOfElements, DependenciesArray) =
5638
emitDependClause(CGF, Data.Dependences, Loc);
5639
if (!Data.Dependences.empty()) {
5640
llvm::Value *DepWaitTaskArgs[7];
5641
DepWaitTaskArgs[0] = UpLoc;
5642
DepWaitTaskArgs[1] = ThreadID;
5643
DepWaitTaskArgs[2] = NumOfElements;
5644
DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5645
DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5646
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5647
DepWaitTaskArgs[6] =
5648
llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5649
5650
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5651
5652
// Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5653
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5654
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5655
// kmp_int32 has_no_wait); if dependence info is specified.
5656
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5657
M, OMPRTL___kmpc_omp_taskwait_deps_51),
5658
DepWaitTaskArgs);
5659
5660
} else {
5661
5662
// Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5663
// global_tid);
5664
llvm::Value *Args[] = {UpLoc, ThreadID};
5665
// Ignore return result until untied tasks are supported.
5666
CGF.EmitRuntimeCall(
5667
OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5668
Args);
5669
}
5670
}
5671
5672
if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5673
Region->emitUntiedSwitch(CGF);
5674
}
5675
5676
void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5677
OpenMPDirectiveKind InnerKind,
5678
const RegionCodeGenTy &CodeGen,
5679
bool HasCancel) {
5680
if (!CGF.HaveInsertPoint())
5681
return;
5682
InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5683
InnerKind != OMPD_critical &&
5684
InnerKind != OMPD_master &&
5685
InnerKind != OMPD_masked);
5686
CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5687
}
5688
5689
namespace {
5690
enum RTCancelKind {
5691
CancelNoreq = 0,
5692
CancelParallel = 1,
5693
CancelLoop = 2,
5694
CancelSections = 3,
5695
CancelTaskgroup = 4
5696
};
5697
} // anonymous namespace
5698
5699
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5700
RTCancelKind CancelKind = CancelNoreq;
5701
if (CancelRegion == OMPD_parallel)
5702
CancelKind = CancelParallel;
5703
else if (CancelRegion == OMPD_for)
5704
CancelKind = CancelLoop;
5705
else if (CancelRegion == OMPD_sections)
5706
CancelKind = CancelSections;
5707
else {
5708
assert(CancelRegion == OMPD_taskgroup);
5709
CancelKind = CancelTaskgroup;
5710
}
5711
return CancelKind;
5712
}
5713
5714
void CGOpenMPRuntime::emitCancellationPointCall(
5715
CodeGenFunction &CGF, SourceLocation Loc,
5716
OpenMPDirectiveKind CancelRegion) {
5717
if (!CGF.HaveInsertPoint())
5718
return;
5719
// Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5720
// global_tid, kmp_int32 cncl_kind);
5721
if (auto *OMPRegionInfo =
5722
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5723
// For 'cancellation point taskgroup', the task region info may not have a
5724
// cancel. This may instead happen in another adjacent task.
5725
if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5726
llvm::Value *Args[] = {
5727
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5728
CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5729
// Ignore return result until untied tasks are supported.
5730
llvm::Value *Result = CGF.EmitRuntimeCall(
5731
OMPBuilder.getOrCreateRuntimeFunction(
5732
CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5733
Args);
5734
// if (__kmpc_cancellationpoint()) {
5735
// call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5736
// exit from construct;
5737
// }
5738
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5739
llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5740
llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5741
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5742
CGF.EmitBlock(ExitBB);
5743
if (CancelRegion == OMPD_parallel)
5744
emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5745
// exit from construct;
5746
CodeGenFunction::JumpDest CancelDest =
5747
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5748
CGF.EmitBranchThroughCleanup(CancelDest);
5749
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5750
}
5751
}
5752
}
5753
5754
void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5755
const Expr *IfCond,
5756
OpenMPDirectiveKind CancelRegion) {
5757
if (!CGF.HaveInsertPoint())
5758
return;
5759
// Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5760
// kmp_int32 cncl_kind);
5761
auto &M = CGM.getModule();
5762
if (auto *OMPRegionInfo =
5763
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5764
auto &&ThenGen = [this, &M, Loc, CancelRegion,
5765
OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5766
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5767
llvm::Value *Args[] = {
5768
RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5769
CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5770
// Ignore return result until untied tasks are supported.
5771
llvm::Value *Result = CGF.EmitRuntimeCall(
5772
OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5773
// if (__kmpc_cancel()) {
5774
// call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5775
// exit from construct;
5776
// }
5777
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5778
llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5779
llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5780
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5781
CGF.EmitBlock(ExitBB);
5782
if (CancelRegion == OMPD_parallel)
5783
RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5784
// exit from construct;
5785
CodeGenFunction::JumpDest CancelDest =
5786
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5787
CGF.EmitBranchThroughCleanup(CancelDest);
5788
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5789
};
5790
if (IfCond) {
5791
emitIfClause(CGF, IfCond, ThenGen,
5792
[](CodeGenFunction &, PrePostActionTy &) {});
5793
} else {
5794
RegionCodeGenTy ThenRCG(ThenGen);
5795
ThenRCG(CGF);
5796
}
5797
}
5798
}
5799
5800
namespace {
5801
/// Cleanup action for uses_allocators support.
5802
class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5803
ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5804
5805
public:
5806
OMPUsesAllocatorsActionTy(
5807
ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5808
: Allocators(Allocators) {}
5809
void Enter(CodeGenFunction &CGF) override {
5810
if (!CGF.HaveInsertPoint())
5811
return;
5812
for (const auto &AllocatorData : Allocators) {
5813
CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5814
CGF, AllocatorData.first, AllocatorData.second);
5815
}
5816
}
5817
void Exit(CodeGenFunction &CGF) override {
5818
if (!CGF.HaveInsertPoint())
5819
return;
5820
for (const auto &AllocatorData : Allocators) {
5821
CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5822
AllocatorData.first);
5823
}
5824
}
5825
};
5826
} // namespace
5827
5828
void CGOpenMPRuntime::emitTargetOutlinedFunction(
5829
const OMPExecutableDirective &D, StringRef ParentName,
5830
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5831
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5832
assert(!ParentName.empty() && "Invalid target entry parent name!");
5833
HasEmittedTargetRegion = true;
5834
SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5835
for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5836
for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5837
const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5838
if (!D.AllocatorTraits)
5839
continue;
5840
Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5841
}
5842
}
5843
OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5844
CodeGen.setAction(UsesAllocatorAction);
5845
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5846
IsOffloadEntry, CodeGen);
5847
}
5848
5849
void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5850
const Expr *Allocator,
5851
const Expr *AllocatorTraits) {
5852
llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5853
ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5854
// Use default memspace handle.
5855
llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5856
llvm::Value *NumTraits = llvm::ConstantInt::get(
5857
CGF.IntTy, cast<ConstantArrayType>(
5858
AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5859
->getSize()
5860
.getLimitedValue());
5861
LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5862
Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5863
AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5864
AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5865
AllocatorTraitsLVal.getBaseInfo(),
5866
AllocatorTraitsLVal.getTBAAInfo());
5867
llvm::Value *Traits = Addr.emitRawPointer(CGF);
5868
5869
llvm::Value *AllocatorVal =
5870
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5871
CGM.getModule(), OMPRTL___kmpc_init_allocator),
5872
{ThreadId, MemSpaceHandle, NumTraits, Traits});
5873
// Store to allocator.
5874
CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5875
cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5876
LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5877
AllocatorVal =
5878
CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5879
Allocator->getType(), Allocator->getExprLoc());
5880
CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5881
}
5882
5883
void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5884
const Expr *Allocator) {
5885
llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5886
ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5887
LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5888
llvm::Value *AllocatorVal =
5889
CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5890
AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5891
CGF.getContext().VoidPtrTy,
5892
Allocator->getExprLoc());
5893
(void)CGF.EmitRuntimeCall(
5894
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5895
OMPRTL___kmpc_destroy_allocator),
5896
{ThreadId, AllocatorVal});
5897
}
5898
5899
void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5900
const OMPExecutableDirective &D, CodeGenFunction &CGF,
5901
int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5902
int32_t &MaxTeamsVal) {
5903
5904
getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5905
getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5906
/*UpperBoundOnly=*/true);
5907
5908
for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5909
for (auto *A : C->getAttrs()) {
5910
int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5911
int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5912
if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5913
CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5914
&AttrMinBlocksVal, &AttrMaxBlocksVal);
5915
else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5916
CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5917
nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5918
&AttrMaxThreadsVal);
5919
else
5920
continue;
5921
5922
MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5923
if (AttrMaxThreadsVal > 0)
5924
MaxThreadsVal = MaxThreadsVal > 0
5925
? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5926
: AttrMaxThreadsVal;
5927
MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5928
if (AttrMaxBlocksVal > 0)
5929
MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5930
: AttrMaxBlocksVal;
5931
}
5932
}
5933
}
5934
5935
void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5936
const OMPExecutableDirective &D, StringRef ParentName,
5937
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5938
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5939
5940
llvm::TargetRegionEntryInfo EntryInfo =
5941
getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
5942
5943
CodeGenFunction CGF(CGM, true);
5944
llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5945
[&CGF, &D, &CodeGen](StringRef EntryFnName) {
5946
const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5947
5948
CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5949
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5950
return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
5951
};
5952
5953
OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5954
IsOffloadEntry, OutlinedFn, OutlinedFnID);
5955
5956
if (!OutlinedFn)
5957
return;
5958
5959
CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5960
5961
for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5962
for (auto *A : C->getAttrs()) {
5963
if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5964
CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5965
}
5966
}
5967
}
5968
5969
/// Checks if the expression is constant or does not have non-trivial function
5970
/// calls.
5971
static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5972
// We can skip constant expressions.
5973
// We can skip expressions with trivial calls or simple expressions.
5974
return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
5975
!E->hasNonTrivialCall(Ctx)) &&
5976
!E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5977
}
5978
5979
const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5980
const Stmt *Body) {
5981
const Stmt *Child = Body->IgnoreContainers();
5982
while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5983
Child = nullptr;
5984
for (const Stmt *S : C->body()) {
5985
if (const auto *E = dyn_cast<Expr>(S)) {
5986
if (isTrivial(Ctx, E))
5987
continue;
5988
}
5989
// Some of the statements can be ignored.
5990
if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5991
isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5992
continue;
5993
// Analyze declarations.
5994
if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5995
if (llvm::all_of(DS->decls(), [](const Decl *D) {
5996
if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5997
isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5998
isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5999
isa<UsingDirectiveDecl>(D) ||
6000
isa<OMPDeclareReductionDecl>(D) ||
6001
isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6002
return true;
6003
const auto *VD = dyn_cast<VarDecl>(D);
6004
if (!VD)
6005
return false;
6006
return VD->hasGlobalStorage() || !VD->isUsed();
6007
}))
6008
continue;
6009
}
6010
// Found multiple children - cannot get the one child only.
6011
if (Child)
6012
return nullptr;
6013
Child = S;
6014
}
6015
if (Child)
6016
Child = Child->IgnoreContainers();
6017
}
6018
return Child;
6019
}
6020
6021
const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6022
CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6023
int32_t &MaxTeamsVal) {
6024
6025
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6026
assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6027
"Expected target-based executable directive.");
6028
switch (DirectiveKind) {
6029
case OMPD_target: {
6030
const auto *CS = D.getInnermostCapturedStmt();
6031
const auto *Body =
6032
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6033
const Stmt *ChildStmt =
6034
CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6035
if (const auto *NestedDir =
6036
dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6037
if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6038
if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6039
const Expr *NumTeams =
6040
NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6041
if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6042
if (auto Constant =
6043
NumTeams->getIntegerConstantExpr(CGF.getContext()))
6044
MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6045
return NumTeams;
6046
}
6047
MinTeamsVal = MaxTeamsVal = 0;
6048
return nullptr;
6049
}
6050
if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6051
isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6052
MinTeamsVal = MaxTeamsVal = 1;
6053
return nullptr;
6054
}
6055
MinTeamsVal = MaxTeamsVal = 1;
6056
return nullptr;
6057
}
6058
// A value of -1 is used to check if we need to emit no teams region
6059
MinTeamsVal = MaxTeamsVal = -1;
6060
return nullptr;
6061
}
6062
case OMPD_target_teams_loop:
6063
case OMPD_target_teams:
6064
case OMPD_target_teams_distribute:
6065
case OMPD_target_teams_distribute_simd:
6066
case OMPD_target_teams_distribute_parallel_for:
6067
case OMPD_target_teams_distribute_parallel_for_simd: {
6068
if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6069
const Expr *NumTeams =
6070
D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6071
if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6072
if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6073
MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6074
return NumTeams;
6075
}
6076
MinTeamsVal = MaxTeamsVal = 0;
6077
return nullptr;
6078
}
6079
case OMPD_target_parallel:
6080
case OMPD_target_parallel_for:
6081
case OMPD_target_parallel_for_simd:
6082
case OMPD_target_parallel_loop:
6083
case OMPD_target_simd:
6084
MinTeamsVal = MaxTeamsVal = 1;
6085
return nullptr;
6086
case OMPD_parallel:
6087
case OMPD_for:
6088
case OMPD_parallel_for:
6089
case OMPD_parallel_loop:
6090
case OMPD_parallel_master:
6091
case OMPD_parallel_sections:
6092
case OMPD_for_simd:
6093
case OMPD_parallel_for_simd:
6094
case OMPD_cancel:
6095
case OMPD_cancellation_point:
6096
case OMPD_ordered:
6097
case OMPD_threadprivate:
6098
case OMPD_allocate:
6099
case OMPD_task:
6100
case OMPD_simd:
6101
case OMPD_tile:
6102
case OMPD_unroll:
6103
case OMPD_sections:
6104
case OMPD_section:
6105
case OMPD_single:
6106
case OMPD_master:
6107
case OMPD_critical:
6108
case OMPD_taskyield:
6109
case OMPD_barrier:
6110
case OMPD_taskwait:
6111
case OMPD_taskgroup:
6112
case OMPD_atomic:
6113
case OMPD_flush:
6114
case OMPD_depobj:
6115
case OMPD_scan:
6116
case OMPD_teams:
6117
case OMPD_target_data:
6118
case OMPD_target_exit_data:
6119
case OMPD_target_enter_data:
6120
case OMPD_distribute:
6121
case OMPD_distribute_simd:
6122
case OMPD_distribute_parallel_for:
6123
case OMPD_distribute_parallel_for_simd:
6124
case OMPD_teams_distribute:
6125
case OMPD_teams_distribute_simd:
6126
case OMPD_teams_distribute_parallel_for:
6127
case OMPD_teams_distribute_parallel_for_simd:
6128
case OMPD_target_update:
6129
case OMPD_declare_simd:
6130
case OMPD_declare_variant:
6131
case OMPD_begin_declare_variant:
6132
case OMPD_end_declare_variant:
6133
case OMPD_declare_target:
6134
case OMPD_end_declare_target:
6135
case OMPD_declare_reduction:
6136
case OMPD_declare_mapper:
6137
case OMPD_taskloop:
6138
case OMPD_taskloop_simd:
6139
case OMPD_master_taskloop:
6140
case OMPD_master_taskloop_simd:
6141
case OMPD_parallel_master_taskloop:
6142
case OMPD_parallel_master_taskloop_simd:
6143
case OMPD_requires:
6144
case OMPD_metadirective:
6145
case OMPD_unknown:
6146
break;
6147
default:
6148
break;
6149
}
6150
llvm_unreachable("Unexpected directive kind.");
6151
}
6152
6153
llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6154
CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6155
assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6156
"Clauses associated with the teams directive expected to be emitted "
6157
"only for the host!");
6158
CGBuilderTy &Bld = CGF.Builder;
6159
int32_t MinNT = -1, MaxNT = -1;
6160
const Expr *NumTeams =
6161
getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6162
if (NumTeams != nullptr) {
6163
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6164
6165
switch (DirectiveKind) {
6166
case OMPD_target: {
6167
const auto *CS = D.getInnermostCapturedStmt();
6168
CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6169
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6170
llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6171
/*IgnoreResultAssign*/ true);
6172
return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6173
/*isSigned=*/true);
6174
}
6175
case OMPD_target_teams:
6176
case OMPD_target_teams_distribute:
6177
case OMPD_target_teams_distribute_simd:
6178
case OMPD_target_teams_distribute_parallel_for:
6179
case OMPD_target_teams_distribute_parallel_for_simd: {
6180
CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6181
llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6182
/*IgnoreResultAssign*/ true);
6183
return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6184
/*isSigned=*/true);
6185
}
6186
default:
6187
break;
6188
}
6189
}
6190
6191
assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6192
return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6193
}
6194
6195
/// Check for a num threads constant value (stored in \p DefaultVal), or
6196
/// expression (stored in \p E). If the value is conditional (via an if-clause),
6197
/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6198
/// nullptr, no expression evaluation is perfomed.
6199
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6200
const Expr **E, int32_t &UpperBound,
6201
bool UpperBoundOnly, llvm::Value **CondVal) {
6202
const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6203
CGF.getContext(), CS->getCapturedStmt());
6204
const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6205
if (!Dir)
6206
return;
6207
6208
if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6209
// Handle if clause. If if clause present, the number of threads is
6210
// calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6211
if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6212
CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6213
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6214
const OMPIfClause *IfClause = nullptr;
6215
for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6216
if (C->getNameModifier() == OMPD_unknown ||
6217
C->getNameModifier() == OMPD_parallel) {
6218
IfClause = C;
6219
break;
6220
}
6221
}
6222
if (IfClause) {
6223
const Expr *CondExpr = IfClause->getCondition();
6224
bool Result;
6225
if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6226
if (!Result) {
6227
UpperBound = 1;
6228
return;
6229
}
6230
} else {
6231
CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6232
if (const auto *PreInit =
6233
cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6234
for (const auto *I : PreInit->decls()) {
6235
if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6236
CGF.EmitVarDecl(cast<VarDecl>(*I));
6237
} else {
6238
CodeGenFunction::AutoVarEmission Emission =
6239
CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6240
CGF.EmitAutoVarCleanups(Emission);
6241
}
6242
}
6243
*CondVal = CGF.EvaluateExprAsBool(CondExpr);
6244
}
6245
}
6246
}
6247
}
6248
// Check the value of num_threads clause iff if clause was not specified
6249
// or is not evaluated to false.
6250
if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6251
CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6252
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6253
const auto *NumThreadsClause =
6254
Dir->getSingleClause<OMPNumThreadsClause>();
6255
const Expr *NTExpr = NumThreadsClause->getNumThreads();
6256
if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6257
if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6258
UpperBound =
6259
UpperBound
6260
? Constant->getZExtValue()
6261
: std::min(UpperBound,
6262
static_cast<int32_t>(Constant->getZExtValue()));
6263
// If we haven't found a upper bound, remember we saw a thread limiting
6264
// clause.
6265
if (UpperBound == -1)
6266
UpperBound = 0;
6267
if (!E)
6268
return;
6269
CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6270
if (const auto *PreInit =
6271
cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6272
for (const auto *I : PreInit->decls()) {
6273
if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6274
CGF.EmitVarDecl(cast<VarDecl>(*I));
6275
} else {
6276
CodeGenFunction::AutoVarEmission Emission =
6277
CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6278
CGF.EmitAutoVarCleanups(Emission);
6279
}
6280
}
6281
}
6282
*E = NTExpr;
6283
}
6284
return;
6285
}
6286
if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6287
UpperBound = 1;
6288
}
6289
6290
const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6291
CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6292
bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6293
assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6294
"Clauses associated with the teams directive expected to be emitted "
6295
"only for the host!");
6296
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6297
assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6298
"Expected target-based executable directive.");
6299
6300
const Expr *NT = nullptr;
6301
const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6302
6303
auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6304
if (E->isIntegerConstantExpr(CGF.getContext())) {
6305
if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6306
UpperBound = UpperBound ? Constant->getZExtValue()
6307
: std::min(UpperBound,
6308
int32_t(Constant->getZExtValue()));
6309
}
6310
// If we haven't found a upper bound, remember we saw a thread limiting
6311
// clause.
6312
if (UpperBound == -1)
6313
UpperBound = 0;
6314
if (EPtr)
6315
*EPtr = E;
6316
};
6317
6318
auto ReturnSequential = [&]() {
6319
UpperBound = 1;
6320
return NT;
6321
};
6322
6323
switch (DirectiveKind) {
6324
case OMPD_target: {
6325
const CapturedStmt *CS = D.getInnermostCapturedStmt();
6326
getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6327
const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6328
CGF.getContext(), CS->getCapturedStmt());
6329
// TODO: The standard is not clear how to resolve two thread limit clauses,
6330
// let's pick the teams one if it's present, otherwise the target one.
6331
const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6332
if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6333
if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6334
ThreadLimitClause = TLC;
6335
if (ThreadLimitExpr) {
6336
CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6337
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6338
CodeGenFunction::LexicalScope Scope(
6339
CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6340
if (const auto *PreInit =
6341
cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6342
for (const auto *I : PreInit->decls()) {
6343
if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6344
CGF.EmitVarDecl(cast<VarDecl>(*I));
6345
} else {
6346
CodeGenFunction::AutoVarEmission Emission =
6347
CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6348
CGF.EmitAutoVarCleanups(Emission);
6349
}
6350
}
6351
}
6352
}
6353
}
6354
}
6355
if (ThreadLimitClause)
6356
CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6357
if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6358
if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6359
!isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6360
CS = Dir->getInnermostCapturedStmt();
6361
const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6362
CGF.getContext(), CS->getCapturedStmt());
6363
Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6364
}
6365
if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6366
CS = Dir->getInnermostCapturedStmt();
6367
getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6368
} else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6369
return ReturnSequential();
6370
}
6371
return NT;
6372
}
6373
case OMPD_target_teams: {
6374
if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6375
CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6376
const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6377
CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6378
}
6379
const CapturedStmt *CS = D.getInnermostCapturedStmt();
6380
getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6381
const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6382
CGF.getContext(), CS->getCapturedStmt());
6383
if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6384
if (Dir->getDirectiveKind() == OMPD_distribute) {
6385
CS = Dir->getInnermostCapturedStmt();
6386
getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6387
}
6388
}
6389
return NT;
6390
}
6391
case OMPD_target_teams_distribute:
6392
if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6393
CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6394
const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6395
CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6396
}
6397
getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6398
UpperBoundOnly, CondVal);
6399
return NT;
6400
case OMPD_target_teams_loop:
6401
case OMPD_target_parallel_loop:
6402
case OMPD_target_parallel:
6403
case OMPD_target_parallel_for:
6404
case OMPD_target_parallel_for_simd:
6405
case OMPD_target_teams_distribute_parallel_for:
6406
case OMPD_target_teams_distribute_parallel_for_simd: {
6407
if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6408
const OMPIfClause *IfClause = nullptr;
6409
for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6410
if (C->getNameModifier() == OMPD_unknown ||
6411
C->getNameModifier() == OMPD_parallel) {
6412
IfClause = C;
6413
break;
6414
}
6415
}
6416
if (IfClause) {
6417
const Expr *Cond = IfClause->getCondition();
6418
bool Result;
6419
if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6420
if (!Result)
6421
return ReturnSequential();
6422
} else {
6423
CodeGenFunction::RunCleanupsScope Scope(CGF);
6424
*CondVal = CGF.EvaluateExprAsBool(Cond);
6425
}
6426
}
6427
}
6428
if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6429
CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6430
const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6431
CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6432
}
6433
if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6434
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6435
const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6436
CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6437
return NumThreadsClause->getNumThreads();
6438
}
6439
return NT;
6440
}
6441
case OMPD_target_teams_distribute_simd:
6442
case OMPD_target_simd:
6443
return ReturnSequential();
6444
default:
6445
break;
6446
}
6447
llvm_unreachable("Unsupported directive kind.");
6448
}
6449
6450
llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6451
CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6452
llvm::Value *NumThreadsVal = nullptr;
6453
llvm::Value *CondVal = nullptr;
6454
llvm::Value *ThreadLimitVal = nullptr;
6455
const Expr *ThreadLimitExpr = nullptr;
6456
int32_t UpperBound = -1;
6457
6458
const Expr *NT = getNumThreadsExprForTargetDirective(
6459
CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6460
&ThreadLimitExpr);
6461
6462
// Thread limit expressions are used below, emit them.
6463
if (ThreadLimitExpr) {
6464
ThreadLimitVal =
6465
CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6466
ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6467
/*isSigned=*/false);
6468
}
6469
6470
// Generate the num teams expression.
6471
if (UpperBound == 1) {
6472
NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6473
} else if (NT) {
6474
NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6475
NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6476
/*isSigned=*/false);
6477
} else if (ThreadLimitVal) {
6478
// If we do not have a num threads value but a thread limit, replace the
6479
// former with the latter. We know handled the thread limit expression.
6480
NumThreadsVal = ThreadLimitVal;
6481
ThreadLimitVal = nullptr;
6482
} else {
6483
// Default to "0" which means runtime choice.
6484
assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6485
NumThreadsVal = CGF.Builder.getInt32(0);
6486
}
6487
6488
// Handle if clause. If if clause present, the number of threads is
6489
// calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6490
if (CondVal) {
6491
CodeGenFunction::RunCleanupsScope Scope(CGF);
6492
NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6493
CGF.Builder.getInt32(1));
6494
}
6495
6496
// If the thread limit and num teams expression were present, take the
6497
// minimum.
6498
if (ThreadLimitVal) {
6499
NumThreadsVal = CGF.Builder.CreateSelect(
6500
CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6501
ThreadLimitVal, NumThreadsVal);
6502
}
6503
6504
return NumThreadsVal;
6505
}
6506
6507
namespace {
6508
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6509
6510
// Utility to handle information from clauses associated with a given
6511
// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6512
// It provides a convenient interface to obtain the information and generate
6513
// code for that information.
6514
class MappableExprsHandler {
6515
public:
6516
/// Get the offset of the OMP_MAP_MEMBER_OF field.
6517
static unsigned getFlagMemberOffset() {
6518
unsigned Offset = 0;
6519
for (uint64_t Remain =
6520
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6521
OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6522
!(Remain & 1); Remain = Remain >> 1)
6523
Offset++;
6524
return Offset;
6525
}
6526
6527
/// Class that holds debugging information for a data mapping to be passed to
6528
/// the runtime library.
6529
class MappingExprInfo {
6530
/// The variable declaration used for the data mapping.
6531
const ValueDecl *MapDecl = nullptr;
6532
/// The original expression used in the map clause, or null if there is
6533
/// none.
6534
const Expr *MapExpr = nullptr;
6535
6536
public:
6537
MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6538
: MapDecl(MapDecl), MapExpr(MapExpr) {}
6539
6540
const ValueDecl *getMapDecl() const { return MapDecl; }
6541
const Expr *getMapExpr() const { return MapExpr; }
6542
};
6543
6544
using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6545
using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6546
using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6547
using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6548
using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6549
using MapNonContiguousArrayTy =
6550
llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6551
using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6552
using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6553
6554
/// This structure contains combined information generated for mappable
6555
/// clauses, including base pointers, pointers, sizes, map types, user-defined
6556
/// mappers, and non-contiguous information.
6557
struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6558
MapExprsArrayTy Exprs;
6559
MapValueDeclsArrayTy Mappers;
6560
MapValueDeclsArrayTy DevicePtrDecls;
6561
6562
/// Append arrays in \a CurInfo.
6563
void append(MapCombinedInfoTy &CurInfo) {
6564
Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6565
DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6566
CurInfo.DevicePtrDecls.end());
6567
Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6568
llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6569
}
6570
};
6571
6572
/// Map between a struct and the its lowest & highest elements which have been
6573
/// mapped.
6574
/// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6575
/// HE(FieldIndex, Pointer)}
6576
struct StructRangeInfoTy {
6577
MapCombinedInfoTy PreliminaryMapData;
6578
std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6579
0, Address::invalid()};
6580
std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6581
0, Address::invalid()};
6582
Address Base = Address::invalid();
6583
Address LB = Address::invalid();
6584
bool IsArraySection = false;
6585
bool HasCompleteRecord = false;
6586
};
6587
6588
private:
6589
/// Kind that defines how a device pointer has to be returned.
6590
struct MapInfo {
6591
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6592
OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6593
ArrayRef<OpenMPMapModifierKind> MapModifiers;
6594
ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6595
bool ReturnDevicePointer = false;
6596
bool IsImplicit = false;
6597
const ValueDecl *Mapper = nullptr;
6598
const Expr *VarRef = nullptr;
6599
bool ForDeviceAddr = false;
6600
6601
MapInfo() = default;
6602
MapInfo(
6603
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6604
OpenMPMapClauseKind MapType,
6605
ArrayRef<OpenMPMapModifierKind> MapModifiers,
6606
ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6607
bool ReturnDevicePointer, bool IsImplicit,
6608
const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6609
bool ForDeviceAddr = false)
6610
: Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6611
MotionModifiers(MotionModifiers),
6612
ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6613
Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6614
};
6615
6616
/// If use_device_ptr or use_device_addr is used on a decl which is a struct
6617
/// member and there is no map information about it, then emission of that
6618
/// entry is deferred until the whole struct has been processed.
6619
struct DeferredDevicePtrEntryTy {
6620
const Expr *IE = nullptr;
6621
const ValueDecl *VD = nullptr;
6622
bool ForDeviceAddr = false;
6623
6624
DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6625
bool ForDeviceAddr)
6626
: IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6627
};
6628
6629
/// The target directive from where the mappable clauses were extracted. It
6630
/// is either a executable directive or a user-defined mapper directive.
6631
llvm::PointerUnion<const OMPExecutableDirective *,
6632
const OMPDeclareMapperDecl *>
6633
CurDir;
6634
6635
/// Function the directive is being generated for.
6636
CodeGenFunction &CGF;
6637
6638
/// Set of all first private variables in the current directive.
6639
/// bool data is set to true if the variable is implicitly marked as
6640
/// firstprivate, false otherwise.
6641
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6642
6643
/// Map between device pointer declarations and their expression components.
6644
/// The key value for declarations in 'this' is null.
6645
llvm::DenseMap<
6646
const ValueDecl *,
6647
SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6648
DevPointersMap;
6649
6650
/// Map between device addr declarations and their expression components.
6651
/// The key value for declarations in 'this' is null.
6652
llvm::DenseMap<
6653
const ValueDecl *,
6654
SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6655
HasDevAddrsMap;
6656
6657
/// Map between lambda declarations and their map type.
6658
llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6659
6660
llvm::Value *getExprTypeSize(const Expr *E) const {
6661
QualType ExprTy = E->getType().getCanonicalType();
6662
6663
// Calculate the size for array shaping expression.
6664
if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6665
llvm::Value *Size =
6666
CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6667
for (const Expr *SE : OAE->getDimensions()) {
6668
llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6669
Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6670
CGF.getContext().getSizeType(),
6671
SE->getExprLoc());
6672
Size = CGF.Builder.CreateNUWMul(Size, Sz);
6673
}
6674
return Size;
6675
}
6676
6677
// Reference types are ignored for mapping purposes.
6678
if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6679
ExprTy = RefTy->getPointeeType().getCanonicalType();
6680
6681
// Given that an array section is considered a built-in type, we need to
6682
// do the calculation based on the length of the section instead of relying
6683
// on CGF.getTypeSize(E->getType()).
6684
if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6685
QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6686
OAE->getBase()->IgnoreParenImpCasts())
6687
.getCanonicalType();
6688
6689
// If there is no length associated with the expression and lower bound is
6690
// not specified too, that means we are using the whole length of the
6691
// base.
6692
if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6693
!OAE->getLowerBound())
6694
return CGF.getTypeSize(BaseTy);
6695
6696
llvm::Value *ElemSize;
6697
if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6698
ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6699
} else {
6700
const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6701
assert(ATy && "Expecting array type if not a pointer type.");
6702
ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6703
}
6704
6705
// If we don't have a length at this point, that is because we have an
6706
// array section with a single element.
6707
if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6708
return ElemSize;
6709
6710
if (const Expr *LenExpr = OAE->getLength()) {
6711
llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6712
LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6713
CGF.getContext().getSizeType(),
6714
LenExpr->getExprLoc());
6715
return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6716
}
6717
assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6718
OAE->getLowerBound() && "expected array_section[lb:].");
6719
// Size = sizetype - lb * elemtype;
6720
llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6721
llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6722
LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6723
CGF.getContext().getSizeType(),
6724
OAE->getLowerBound()->getExprLoc());
6725
LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6726
llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6727
llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6728
LengthVal = CGF.Builder.CreateSelect(
6729
Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6730
return LengthVal;
6731
}
6732
return CGF.getTypeSize(ExprTy);
6733
}
6734
6735
/// Return the corresponding bits for a given map clause modifier. Add
6736
/// a flag marking the map as a pointer if requested. Add a flag marking the
6737
/// map as the first one of a series of maps that relate to the same map
6738
/// expression.
6739
OpenMPOffloadMappingFlags getMapTypeBits(
6740
OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6741
ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6742
bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6743
OpenMPOffloadMappingFlags Bits =
6744
IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6745
: OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6746
switch (MapType) {
6747
case OMPC_MAP_alloc:
6748
case OMPC_MAP_release:
6749
// alloc and release is the default behavior in the runtime library, i.e.
6750
// if we don't pass any bits alloc/release that is what the runtime is
6751
// going to do. Therefore, we don't need to signal anything for these two
6752
// type modifiers.
6753
break;
6754
case OMPC_MAP_to:
6755
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6756
break;
6757
case OMPC_MAP_from:
6758
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6759
break;
6760
case OMPC_MAP_tofrom:
6761
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6762
OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6763
break;
6764
case OMPC_MAP_delete:
6765
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6766
break;
6767
case OMPC_MAP_unknown:
6768
llvm_unreachable("Unexpected map type!");
6769
}
6770
if (AddPtrFlag)
6771
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6772
if (AddIsTargetParamFlag)
6773
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6774
if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6775
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6776
if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6777
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6778
if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6779
llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6780
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6781
if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6782
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6783
if (IsNonContiguous)
6784
Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6785
return Bits;
6786
}
6787
6788
/// Return true if the provided expression is a final array section. A
6789
/// final array section, is one whose length can't be proved to be one.
6790
bool isFinalArraySectionExpression(const Expr *E) const {
6791
const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6792
6793
// It is not an array section and therefore not a unity-size one.
6794
if (!OASE)
6795
return false;
6796
6797
// An array section with no colon always refer to a single element.
6798
if (OASE->getColonLocFirst().isInvalid())
6799
return false;
6800
6801
const Expr *Length = OASE->getLength();
6802
6803
// If we don't have a length we have to check if the array has size 1
6804
// for this dimension. Also, we should always expect a length if the
6805
// base type is pointer.
6806
if (!Length) {
6807
QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
6808
OASE->getBase()->IgnoreParenImpCasts())
6809
.getCanonicalType();
6810
if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6811
return ATy->getSExtSize() != 1;
6812
// If we don't have a constant dimension length, we have to consider
6813
// the current section as having any size, so it is not necessarily
6814
// unitary. If it happen to be unity size, that's user fault.
6815
return true;
6816
}
6817
6818
// Check if the length evaluates to 1.
6819
Expr::EvalResult Result;
6820
if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6821
return true; // Can have more that size 1.
6822
6823
llvm::APSInt ConstLength = Result.Val.getInt();
6824
return ConstLength.getSExtValue() != 1;
6825
}
6826
6827
/// Generate the base pointers, section pointers, sizes, map type bits, and
6828
/// user-defined mappers (all included in \a CombinedInfo) for the provided
6829
/// map type, map or motion modifiers, and expression components.
6830
/// \a IsFirstComponent should be set to true if the provided set of
6831
/// components is the first associated with a capture.
6832
void generateInfoForComponentList(
6833
OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6834
ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6835
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6836
MapCombinedInfoTy &CombinedInfo,
6837
MapCombinedInfoTy &StructBaseCombinedInfo,
6838
StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6839
bool IsImplicit, bool GenerateAllInfoForClauses,
6840
const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6841
const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6842
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6843
OverlappedElements = std::nullopt,
6844
bool AreBothBasePtrAndPteeMapped = false) const {
6845
// The following summarizes what has to be generated for each map and the
6846
// types below. The generated information is expressed in this order:
6847
// base pointer, section pointer, size, flags
6848
// (to add to the ones that come from the map type and modifier).
6849
//
6850
// double d;
6851
// int i[100];
6852
// float *p;
6853
// int **a = &i;
6854
//
6855
// struct S1 {
6856
// int i;
6857
// float f[50];
6858
// }
6859
// struct S2 {
6860
// int i;
6861
// float f[50];
6862
// S1 s;
6863
// double *p;
6864
// struct S2 *ps;
6865
// int &ref;
6866
// }
6867
// S2 s;
6868
// S2 *ps;
6869
//
6870
// map(d)
6871
// &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6872
//
6873
// map(i)
6874
// &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6875
//
6876
// map(i[1:23])
6877
// &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6878
//
6879
// map(p)
6880
// &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6881
//
6882
// map(p[1:24])
6883
// &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6884
// in unified shared memory mode or for local pointers
6885
// p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6886
//
6887
// map((*a)[0:3])
6888
// &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6889
// &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6890
//
6891
// map(**a)
6892
// &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6893
// &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6894
//
6895
// map(s)
6896
// &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6897
//
6898
// map(s.i)
6899
// &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6900
//
6901
// map(s.s.f)
6902
// &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6903
//
6904
// map(s.p)
6905
// &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6906
//
6907
// map(to: s.p[:22])
6908
// &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6909
// &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6910
// &(s.p), &(s.p[0]), 22*sizeof(double),
6911
// MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6912
// (*) alloc space for struct members, only this is a target parameter
6913
// (**) map the pointer (nothing to be mapped in this example) (the compiler
6914
// optimizes this entry out, same in the examples below)
6915
// (***) map the pointee (map: to)
6916
//
6917
// map(to: s.ref)
6918
// &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6919
// &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6920
// (*) alloc space for struct members, only this is a target parameter
6921
// (**) map the pointer (nothing to be mapped in this example) (the compiler
6922
// optimizes this entry out, same in the examples below)
6923
// (***) map the pointee (map: to)
6924
//
6925
// map(s.ps)
6926
// &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6927
//
6928
// map(from: s.ps->s.i)
6929
// &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6930
// &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6931
// &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6932
//
6933
// map(to: s.ps->ps)
6934
// &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6935
// &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6936
// &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6937
//
6938
// map(s.ps->ps->ps)
6939
// &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6940
// &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6941
// &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6942
// &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6943
//
6944
// map(to: s.ps->ps->s.f[:22])
6945
// &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6946
// &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6947
// &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6948
// &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6949
//
6950
// map(ps)
6951
// &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6952
//
6953
// map(ps->i)
6954
// ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6955
//
6956
// map(ps->s.f)
6957
// ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6958
//
6959
// map(from: ps->p)
6960
// ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6961
//
6962
// map(to: ps->p[:22])
6963
// ps, &(ps->p), sizeof(double*), TARGET_PARAM
6964
// ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6965
// &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6966
//
6967
// map(ps->ps)
6968
// ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6969
//
6970
// map(from: ps->ps->s.i)
6971
// ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6972
// ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6973
// &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6974
//
6975
// map(from: ps->ps->ps)
6976
// ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6977
// ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6978
// &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6979
//
6980
// map(ps->ps->ps->ps)
6981
// ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6982
// ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6983
// &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6984
// &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6985
//
6986
// map(to: ps->ps->ps->s.f[:22])
6987
// ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6988
// ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6989
// &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6990
// &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6991
//
6992
// map(to: s.f[:22]) map(from: s.p[:33])
6993
// &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6994
// sizeof(double*) (**), TARGET_PARAM
6995
// &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6996
// &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6997
// &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6998
// (*) allocate contiguous space needed to fit all mapped members even if
6999
// we allocate space for members not mapped (in this example,
7000
// s.f[22..49] and s.s are not mapped, yet we must allocate space for
7001
// them as well because they fall between &s.f[0] and &s.p)
7002
//
7003
// map(from: s.f[:22]) map(to: ps->p[:33])
7004
// &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7005
// ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7006
// ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7007
// &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7008
// (*) the struct this entry pertains to is the 2nd element in the list of
7009
// arguments, hence MEMBER_OF(2)
7010
//
7011
// map(from: s.f[:22], s.s) map(to: ps->p[:33])
7012
// &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7013
// &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7014
// &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7015
// ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7016
// ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7017
// &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7018
// (*) the struct this entry pertains to is the 4th element in the list
7019
// of arguments, hence MEMBER_OF(4)
7020
//
7021
// map(p, p[:100])
7022
// ===> map(p[:100])
7023
// &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7024
7025
// Track if the map information being generated is the first for a capture.
7026
bool IsCaptureFirstInfo = IsFirstComponentList;
7027
// When the variable is on a declare target link or in a to clause with
7028
// unified memory, a reference is needed to hold the host/device address
7029
// of the variable.
7030
bool RequiresReference = false;
7031
7032
// Scan the components from the base to the complete expression.
7033
auto CI = Components.rbegin();
7034
auto CE = Components.rend();
7035
auto I = CI;
7036
7037
// Track if the map information being generated is the first for a list of
7038
// components.
7039
bool IsExpressionFirstInfo = true;
7040
bool FirstPointerInComplexData = false;
7041
Address BP = Address::invalid();
7042
const Expr *AssocExpr = I->getAssociatedExpression();
7043
const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7044
const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7045
const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7046
7047
if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7048
return;
7049
if (isa<MemberExpr>(AssocExpr)) {
7050
// The base is the 'this' pointer. The content of the pointer is going
7051
// to be the base of the field being mapped.
7052
BP = CGF.LoadCXXThisAddress();
7053
} else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7054
(OASE &&
7055
isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7056
BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7057
} else if (OAShE &&
7058
isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7059
BP = Address(
7060
CGF.EmitScalarExpr(OAShE->getBase()),
7061
CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7062
CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7063
} else {
7064
// The base is the reference to the variable.
7065
// BP = &Var.
7066
BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7067
if (const auto *VD =
7068
dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7069
if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7070
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7071
if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7072
((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7073
*Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7074
CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7075
RequiresReference = true;
7076
BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7077
}
7078
}
7079
}
7080
7081
// If the variable is a pointer and is being dereferenced (i.e. is not
7082
// the last component), the base has to be the pointer itself, not its
7083
// reference. References are ignored for mapping purposes.
7084
QualType Ty =
7085
I->getAssociatedDeclaration()->getType().getNonReferenceType();
7086
if (Ty->isAnyPointerType() && std::next(I) != CE) {
7087
// No need to generate individual map information for the pointer, it
7088
// can be associated with the combined storage if shared memory mode is
7089
// active or the base declaration is not global variable.
7090
const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7091
if (!AreBothBasePtrAndPteeMapped &&
7092
(CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7093
!VD || VD->hasLocalStorage()))
7094
BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7095
else
7096
FirstPointerInComplexData = true;
7097
++I;
7098
}
7099
}
7100
7101
// Track whether a component of the list should be marked as MEMBER_OF some
7102
// combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7103
// in a component list should be marked as MEMBER_OF, all subsequent entries
7104
// do not belong to the base struct. E.g.
7105
// struct S2 s;
7106
// s.ps->ps->ps->f[:]
7107
// (1) (2) (3) (4)
7108
// ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7109
// PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7110
// is the pointee of ps(2) which is not member of struct s, so it should not
7111
// be marked as such (it is still PTR_AND_OBJ).
7112
// The variable is initialized to false so that PTR_AND_OBJ entries which
7113
// are not struct members are not considered (e.g. array of pointers to
7114
// data).
7115
bool ShouldBeMemberOf = false;
7116
7117
// Variable keeping track of whether or not we have encountered a component
7118
// in the component list which is a member expression. Useful when we have a
7119
// pointer or a final array section, in which case it is the previous
7120
// component in the list which tells us whether we have a member expression.
7121
// E.g. X.f[:]
7122
// While processing the final array section "[:]" it is "f" which tells us
7123
// whether we are dealing with a member of a declared struct.
7124
const MemberExpr *EncounteredME = nullptr;
7125
7126
// Track for the total number of dimension. Start from one for the dummy
7127
// dimension.
7128
uint64_t DimSize = 1;
7129
7130
bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7131
bool IsPrevMemberReference = false;
7132
7133
// We need to check if we will be encountering any MEs. If we do not
7134
// encounter any ME expression it means we will be mapping the whole struct.
7135
// In that case we need to skip adding an entry for the struct to the
7136
// CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7137
// list only when generating all info for clauses.
7138
bool IsMappingWholeStruct = true;
7139
if (!GenerateAllInfoForClauses) {
7140
IsMappingWholeStruct = false;
7141
} else {
7142
for (auto TempI = I; TempI != CE; ++TempI) {
7143
const MemberExpr *PossibleME =
7144
dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7145
if (PossibleME) {
7146
IsMappingWholeStruct = false;
7147
break;
7148
}
7149
}
7150
}
7151
7152
for (; I != CE; ++I) {
7153
// If the current component is member of a struct (parent struct) mark it.
7154
if (!EncounteredME) {
7155
EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7156
// If we encounter a PTR_AND_OBJ entry from now on it should be marked
7157
// as MEMBER_OF the parent struct.
7158
if (EncounteredME) {
7159
ShouldBeMemberOf = true;
7160
// Do not emit as complex pointer if this is actually not array-like
7161
// expression.
7162
if (FirstPointerInComplexData) {
7163
QualType Ty = std::prev(I)
7164
->getAssociatedDeclaration()
7165
->getType()
7166
.getNonReferenceType();
7167
BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7168
FirstPointerInComplexData = false;
7169
}
7170
}
7171
}
7172
7173
auto Next = std::next(I);
7174
7175
// We need to generate the addresses and sizes if this is the last
7176
// component, if the component is a pointer or if it is an array section
7177
// whose length can't be proved to be one. If this is a pointer, it
7178
// becomes the base address for the following components.
7179
7180
// A final array section, is one whose length can't be proved to be one.
7181
// If the map item is non-contiguous then we don't treat any array section
7182
// as final array section.
7183
bool IsFinalArraySection =
7184
!IsNonContiguous &&
7185
isFinalArraySectionExpression(I->getAssociatedExpression());
7186
7187
// If we have a declaration for the mapping use that, otherwise use
7188
// the base declaration of the map clause.
7189
const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7190
? I->getAssociatedDeclaration()
7191
: BaseDecl;
7192
MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7193
: MapExpr;
7194
7195
// Get information on whether the element is a pointer. Have to do a
7196
// special treatment for array sections given that they are built-in
7197
// types.
7198
const auto *OASE =
7199
dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7200
const auto *OAShE =
7201
dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7202
const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7203
const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7204
bool IsPointer =
7205
OAShE ||
7206
(OASE && ArraySectionExpr::getBaseOriginalType(OASE)
7207
.getCanonicalType()
7208
->isAnyPointerType()) ||
7209
I->getAssociatedExpression()->getType()->isAnyPointerType();
7210
bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7211
MapDecl &&
7212
MapDecl->getType()->isLValueReferenceType();
7213
bool IsNonDerefPointer = IsPointer &&
7214
!(UO && UO->getOpcode() != UO_Deref) && !BO &&
7215
!IsNonContiguous;
7216
7217
if (OASE)
7218
++DimSize;
7219
7220
if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7221
IsFinalArraySection) {
7222
// If this is not the last component, we expect the pointer to be
7223
// associated with an array expression or member expression.
7224
assert((Next == CE ||
7225
isa<MemberExpr>(Next->getAssociatedExpression()) ||
7226
isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7227
isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7228
isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7229
isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7230
isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7231
"Unexpected expression");
7232
7233
Address LB = Address::invalid();
7234
Address LowestElem = Address::invalid();
7235
auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7236
const MemberExpr *E) {
7237
const Expr *BaseExpr = E->getBase();
7238
// If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7239
// scalar.
7240
LValue BaseLV;
7241
if (E->isArrow()) {
7242
LValueBaseInfo BaseInfo;
7243
TBAAAccessInfo TBAAInfo;
7244
Address Addr =
7245
CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7246
QualType PtrTy = BaseExpr->getType()->getPointeeType();
7247
BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7248
} else {
7249
BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7250
}
7251
return BaseLV;
7252
};
7253
if (OAShE) {
7254
LowestElem = LB =
7255
Address(CGF.EmitScalarExpr(OAShE->getBase()),
7256
CGF.ConvertTypeForMem(
7257
OAShE->getBase()->getType()->getPointeeType()),
7258
CGF.getContext().getTypeAlignInChars(
7259
OAShE->getBase()->getType()));
7260
} else if (IsMemberReference) {
7261
const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7262
LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7263
LowestElem = CGF.EmitLValueForFieldInitialization(
7264
BaseLVal, cast<FieldDecl>(MapDecl))
7265
.getAddress();
7266
LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7267
.getAddress();
7268
} else {
7269
LowestElem = LB =
7270
CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7271
.getAddress();
7272
}
7273
7274
// If this component is a pointer inside the base struct then we don't
7275
// need to create any entry for it - it will be combined with the object
7276
// it is pointing to into a single PTR_AND_OBJ entry.
7277
bool IsMemberPointerOrAddr =
7278
EncounteredME &&
7279
(((IsPointer || ForDeviceAddr) &&
7280
I->getAssociatedExpression() == EncounteredME) ||
7281
(IsPrevMemberReference && !IsPointer) ||
7282
(IsMemberReference && Next != CE &&
7283
!Next->getAssociatedExpression()->getType()->isPointerType()));
7284
if (!OverlappedElements.empty() && Next == CE) {
7285
// Handle base element with the info for overlapped elements.
7286
assert(!PartialStruct.Base.isValid() && "The base element is set.");
7287
assert(!IsPointer &&
7288
"Unexpected base element with the pointer type.");
7289
// Mark the whole struct as the struct that requires allocation on the
7290
// device.
7291
PartialStruct.LowestElem = {0, LowestElem};
7292
CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7293
I->getAssociatedExpression()->getType());
7294
Address HB = CGF.Builder.CreateConstGEP(
7295
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7296
LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7297
TypeSize.getQuantity() - 1);
7298
PartialStruct.HighestElem = {
7299
std::numeric_limits<decltype(
7300
PartialStruct.HighestElem.first)>::max(),
7301
HB};
7302
PartialStruct.Base = BP;
7303
PartialStruct.LB = LB;
7304
assert(
7305
PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7306
"Overlapped elements must be used only once for the variable.");
7307
std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7308
// Emit data for non-overlapped data.
7309
OpenMPOffloadMappingFlags Flags =
7310
OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7311
getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7312
/*AddPtrFlag=*/false,
7313
/*AddIsTargetParamFlag=*/false, IsNonContiguous);
7314
llvm::Value *Size = nullptr;
7315
// Do bitcopy of all non-overlapped structure elements.
7316
for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7317
Component : OverlappedElements) {
7318
Address ComponentLB = Address::invalid();
7319
for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7320
Component) {
7321
if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7322
const auto *FD = dyn_cast<FieldDecl>(VD);
7323
if (FD && FD->getType()->isLValueReferenceType()) {
7324
const auto *ME =
7325
cast<MemberExpr>(MC.getAssociatedExpression());
7326
LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7327
ComponentLB =
7328
CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7329
.getAddress();
7330
} else {
7331
ComponentLB =
7332
CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7333
.getAddress();
7334
}
7335
llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7336
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7337
Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7338
LBPtr);
7339
break;
7340
}
7341
}
7342
assert(Size && "Failed to determine structure size");
7343
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7344
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7345
CombinedInfo.DevicePtrDecls.push_back(nullptr);
7346
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7347
CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7348
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7349
Size, CGF.Int64Ty, /*isSigned=*/true));
7350
CombinedInfo.Types.push_back(Flags);
7351
CombinedInfo.Mappers.push_back(nullptr);
7352
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7353
: 1);
7354
LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7355
}
7356
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7357
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7358
CombinedInfo.DevicePtrDecls.push_back(nullptr);
7359
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7360
CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7361
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7362
Size = CGF.Builder.CreatePtrDiff(
7363
CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7364
LBPtr);
7365
CombinedInfo.Sizes.push_back(
7366
CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7367
CombinedInfo.Types.push_back(Flags);
7368
CombinedInfo.Mappers.push_back(nullptr);
7369
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7370
: 1);
7371
break;
7372
}
7373
llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7374
// Skip adding an entry in the CurInfo of this combined entry if the
7375
// whole struct is currently being mapped. The struct needs to be added
7376
// in the first position before any data internal to the struct is being
7377
// mapped.
7378
if (!IsMemberPointerOrAddr ||
7379
(Next == CE && MapType != OMPC_MAP_unknown)) {
7380
if (!IsMappingWholeStruct) {
7381
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7382
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7383
CombinedInfo.DevicePtrDecls.push_back(nullptr);
7384
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7385
CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7386
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7387
Size, CGF.Int64Ty, /*isSigned=*/true));
7388
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7389
: 1);
7390
} else {
7391
StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7392
StructBaseCombinedInfo.BasePointers.push_back(
7393
BP.emitRawPointer(CGF));
7394
StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7395
StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7396
StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7397
StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7398
Size, CGF.Int64Ty, /*isSigned=*/true));
7399
StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7400
IsNonContiguous ? DimSize : 1);
7401
}
7402
7403
// If Mapper is valid, the last component inherits the mapper.
7404
bool HasMapper = Mapper && Next == CE;
7405
if (!IsMappingWholeStruct)
7406
CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7407
else
7408
StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7409
: nullptr);
7410
7411
// We need to add a pointer flag for each map that comes from the
7412
// same expression except for the first one. We also need to signal
7413
// this map is the first one that relates with the current capture
7414
// (there is a set of entries for each capture).
7415
OpenMPOffloadMappingFlags Flags =
7416
getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7417
!IsExpressionFirstInfo || RequiresReference ||
7418
FirstPointerInComplexData || IsMemberReference,
7419
AreBothBasePtrAndPteeMapped ||
7420
(IsCaptureFirstInfo && !RequiresReference),
7421
IsNonContiguous);
7422
7423
if (!IsExpressionFirstInfo || IsMemberReference) {
7424
// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7425
// then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7426
if (IsPointer || (IsMemberReference && Next != CE))
7427
Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7428
OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7429
OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7430
OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7431
OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7432
7433
if (ShouldBeMemberOf) {
7434
// Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7435
// should be later updated with the correct value of MEMBER_OF.
7436
Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7437
// From now on, all subsequent PTR_AND_OBJ entries should not be
7438
// marked as MEMBER_OF.
7439
ShouldBeMemberOf = false;
7440
}
7441
}
7442
7443
if (!IsMappingWholeStruct)
7444
CombinedInfo.Types.push_back(Flags);
7445
else
7446
StructBaseCombinedInfo.Types.push_back(Flags);
7447
}
7448
7449
// If we have encountered a member expression so far, keep track of the
7450
// mapped member. If the parent is "*this", then the value declaration
7451
// is nullptr.
7452
if (EncounteredME) {
7453
const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7454
unsigned FieldIndex = FD->getFieldIndex();
7455
7456
// Update info about the lowest and highest elements for this struct
7457
if (!PartialStruct.Base.isValid()) {
7458
PartialStruct.LowestElem = {FieldIndex, LowestElem};
7459
if (IsFinalArraySection) {
7460
Address HB =
7461
CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7462
.getAddress();
7463
PartialStruct.HighestElem = {FieldIndex, HB};
7464
} else {
7465
PartialStruct.HighestElem = {FieldIndex, LowestElem};
7466
}
7467
PartialStruct.Base = BP;
7468
PartialStruct.LB = BP;
7469
} else if (FieldIndex < PartialStruct.LowestElem.first) {
7470
PartialStruct.LowestElem = {FieldIndex, LowestElem};
7471
} else if (FieldIndex > PartialStruct.HighestElem.first) {
7472
if (IsFinalArraySection) {
7473
Address HB =
7474
CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7475
.getAddress();
7476
PartialStruct.HighestElem = {FieldIndex, HB};
7477
} else {
7478
PartialStruct.HighestElem = {FieldIndex, LowestElem};
7479
}
7480
}
7481
}
7482
7483
// Need to emit combined struct for array sections.
7484
if (IsFinalArraySection || IsNonContiguous)
7485
PartialStruct.IsArraySection = true;
7486
7487
// If we have a final array section, we are done with this expression.
7488
if (IsFinalArraySection)
7489
break;
7490
7491
// The pointer becomes the base for the next element.
7492
if (Next != CE)
7493
BP = IsMemberReference ? LowestElem : LB;
7494
7495
IsExpressionFirstInfo = false;
7496
IsCaptureFirstInfo = false;
7497
FirstPointerInComplexData = false;
7498
IsPrevMemberReference = IsMemberReference;
7499
} else if (FirstPointerInComplexData) {
7500
QualType Ty = Components.rbegin()
7501
->getAssociatedDeclaration()
7502
->getType()
7503
.getNonReferenceType();
7504
BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7505
FirstPointerInComplexData = false;
7506
}
7507
}
7508
// If ran into the whole component - allocate the space for the whole
7509
// record.
7510
if (!EncounteredME)
7511
PartialStruct.HasCompleteRecord = true;
7512
7513
if (!IsNonContiguous)
7514
return;
7515
7516
const ASTContext &Context = CGF.getContext();
7517
7518
// For supporting stride in array section, we need to initialize the first
7519
// dimension size as 1, first offset as 0, and first count as 1
7520
MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7521
MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7522
MapValuesArrayTy CurStrides;
7523
MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7524
uint64_t ElementTypeSize;
7525
7526
// Collect Size information for each dimension and get the element size as
7527
// the first Stride. For example, for `int arr[10][10]`, the DimSizes
7528
// should be [10, 10] and the first stride is 4 btyes.
7529
for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7530
Components) {
7531
const Expr *AssocExpr = Component.getAssociatedExpression();
7532
const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7533
7534
if (!OASE)
7535
continue;
7536
7537
QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7538
auto *CAT = Context.getAsConstantArrayType(Ty);
7539
auto *VAT = Context.getAsVariableArrayType(Ty);
7540
7541
// We need all the dimension size except for the last dimension.
7542
assert((VAT || CAT || &Component == &*Components.begin()) &&
7543
"Should be either ConstantArray or VariableArray if not the "
7544
"first Component");
7545
7546
// Get element size if CurStrides is empty.
7547
if (CurStrides.empty()) {
7548
const Type *ElementType = nullptr;
7549
if (CAT)
7550
ElementType = CAT->getElementType().getTypePtr();
7551
else if (VAT)
7552
ElementType = VAT->getElementType().getTypePtr();
7553
else
7554
assert(&Component == &*Components.begin() &&
7555
"Only expect pointer (non CAT or VAT) when this is the "
7556
"first Component");
7557
// If ElementType is null, then it means the base is a pointer
7558
// (neither CAT nor VAT) and we'll attempt to get ElementType again
7559
// for next iteration.
7560
if (ElementType) {
7561
// For the case that having pointer as base, we need to remove one
7562
// level of indirection.
7563
if (&Component != &*Components.begin())
7564
ElementType = ElementType->getPointeeOrArrayElementType();
7565
ElementTypeSize =
7566
Context.getTypeSizeInChars(ElementType).getQuantity();
7567
CurStrides.push_back(
7568
llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7569
}
7570
}
7571
// Get dimension value except for the last dimension since we don't need
7572
// it.
7573
if (DimSizes.size() < Components.size() - 1) {
7574
if (CAT)
7575
DimSizes.push_back(
7576
llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7577
else if (VAT)
7578
DimSizes.push_back(CGF.Builder.CreateIntCast(
7579
CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7580
/*IsSigned=*/false));
7581
}
7582
}
7583
7584
// Skip the dummy dimension since we have already have its information.
7585
auto *DI = DimSizes.begin() + 1;
7586
// Product of dimension.
7587
llvm::Value *DimProd =
7588
llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7589
7590
// Collect info for non-contiguous. Notice that offset, count, and stride
7591
// are only meaningful for array-section, so we insert a null for anything
7592
// other than array-section.
7593
// Also, the size of offset, count, and stride are not the same as
7594
// pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7595
// count, and stride are the same as the number of non-contiguous
7596
// declaration in target update to/from clause.
7597
for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7598
Components) {
7599
const Expr *AssocExpr = Component.getAssociatedExpression();
7600
7601
if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7602
llvm::Value *Offset = CGF.Builder.CreateIntCast(
7603
CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7604
/*isSigned=*/false);
7605
CurOffsets.push_back(Offset);
7606
CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7607
CurStrides.push_back(CurStrides.back());
7608
continue;
7609
}
7610
7611
const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7612
7613
if (!OASE)
7614
continue;
7615
7616
// Offset
7617
const Expr *OffsetExpr = OASE->getLowerBound();
7618
llvm::Value *Offset = nullptr;
7619
if (!OffsetExpr) {
7620
// If offset is absent, then we just set it to zero.
7621
Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7622
} else {
7623
Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7624
CGF.Int64Ty,
7625
/*isSigned=*/false);
7626
}
7627
CurOffsets.push_back(Offset);
7628
7629
// Count
7630
const Expr *CountExpr = OASE->getLength();
7631
llvm::Value *Count = nullptr;
7632
if (!CountExpr) {
7633
// In Clang, once a high dimension is an array section, we construct all
7634
// the lower dimension as array section, however, for case like
7635
// arr[0:2][2], Clang construct the inner dimension as an array section
7636
// but it actually is not in an array section form according to spec.
7637
if (!OASE->getColonLocFirst().isValid() &&
7638
!OASE->getColonLocSecond().isValid()) {
7639
Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7640
} else {
7641
// OpenMP 5.0, 2.1.5 Array Sections, Description.
7642
// When the length is absent it defaults to ⌈(size −
7643
// lower-bound)/stride⌉, where size is the size of the array
7644
// dimension.
7645
const Expr *StrideExpr = OASE->getStride();
7646
llvm::Value *Stride =
7647
StrideExpr
7648
? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7649
CGF.Int64Ty, /*isSigned=*/false)
7650
: nullptr;
7651
if (Stride)
7652
Count = CGF.Builder.CreateUDiv(
7653
CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7654
else
7655
Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7656
}
7657
} else {
7658
Count = CGF.EmitScalarExpr(CountExpr);
7659
}
7660
Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7661
CurCounts.push_back(Count);
7662
7663
// Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7664
// Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7665
// Offset Count Stride
7666
// D0 0 1 4 (int) <- dummy dimension
7667
// D1 0 2 8 (2 * (1) * 4)
7668
// D2 1 2 20 (1 * (1 * 5) * 4)
7669
// D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7670
const Expr *StrideExpr = OASE->getStride();
7671
llvm::Value *Stride =
7672
StrideExpr
7673
? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7674
CGF.Int64Ty, /*isSigned=*/false)
7675
: nullptr;
7676
DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7677
if (Stride)
7678
CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7679
else
7680
CurStrides.push_back(DimProd);
7681
if (DI != DimSizes.end())
7682
++DI;
7683
}
7684
7685
CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7686
CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7687
CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7688
}
7689
7690
/// Return the adjusted map modifiers if the declaration a capture refers to
7691
/// appears in a first-private clause. This is expected to be used only with
7692
/// directives that start with 'target'.
7693
OpenMPOffloadMappingFlags
7694
getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7695
assert(Cap.capturesVariable() && "Expected capture by reference only!");
7696
7697
// A first private variable captured by reference will use only the
7698
// 'private ptr' and 'map to' flag. Return the right flags if the captured
7699
// declaration is known as first-private in this handler.
7700
if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7701
if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7702
return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7703
OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7704
return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7705
OpenMPOffloadMappingFlags::OMP_MAP_TO;
7706
}
7707
auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7708
if (I != LambdasMap.end())
7709
// for map(to: lambda): using user specified map type.
7710
return getMapTypeBits(
7711
I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7712
/*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7713
/*AddPtrFlag=*/false,
7714
/*AddIsTargetParamFlag=*/false,
7715
/*isNonContiguous=*/false);
7716
return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7717
OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7718
}
7719
7720
void getPlainLayout(const CXXRecordDecl *RD,
7721
llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7722
bool AsBase) const {
7723
const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7724
7725
llvm::StructType *St =
7726
AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7727
7728
unsigned NumElements = St->getNumElements();
7729
llvm::SmallVector<
7730
llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7731
RecordLayout(NumElements);
7732
7733
// Fill bases.
7734
for (const auto &I : RD->bases()) {
7735
if (I.isVirtual())
7736
continue;
7737
7738
QualType BaseTy = I.getType();
7739
const auto *Base = BaseTy->getAsCXXRecordDecl();
7740
// Ignore empty bases.
7741
if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
7742
CGF.getContext()
7743
.getASTRecordLayout(Base)
7744
.getNonVirtualSize()
7745
.isZero())
7746
continue;
7747
7748
unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7749
RecordLayout[FieldIndex] = Base;
7750
}
7751
// Fill in virtual bases.
7752
for (const auto &I : RD->vbases()) {
7753
QualType BaseTy = I.getType();
7754
// Ignore empty bases.
7755
if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
7756
continue;
7757
7758
const auto *Base = BaseTy->getAsCXXRecordDecl();
7759
unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7760
if (RecordLayout[FieldIndex])
7761
continue;
7762
RecordLayout[FieldIndex] = Base;
7763
}
7764
// Fill in all the fields.
7765
assert(!RD->isUnion() && "Unexpected union.");
7766
for (const auto *Field : RD->fields()) {
7767
// Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7768
// will fill in later.)
7769
if (!Field->isBitField() &&
7770
!isEmptyFieldForLayout(CGF.getContext(), Field)) {
7771
unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7772
RecordLayout[FieldIndex] = Field;
7773
}
7774
}
7775
for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7776
&Data : RecordLayout) {
7777
if (Data.isNull())
7778
continue;
7779
if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7780
getPlainLayout(Base, Layout, /*AsBase=*/true);
7781
else
7782
Layout.push_back(Data.get<const FieldDecl *>());
7783
}
7784
}
7785
7786
/// Generate all the base pointers, section pointers, sizes, map types, and
7787
/// mappers for the extracted mappable expressions (all included in \a
7788
/// CombinedInfo). Also, for each item that relates with a device pointer, a
7789
/// pair of the relevant declaration and index where it occurs is appended to
7790
/// the device pointers info array.
7791
void generateAllInfoForClauses(
7792
ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7793
llvm::OpenMPIRBuilder &OMPBuilder,
7794
const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7795
llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7796
// We have to process the component lists that relate with the same
7797
// declaration in a single chunk so that we can generate the map flags
7798
// correctly. Therefore, we organize all lists in a map.
7799
enum MapKind { Present, Allocs, Other, Total };
7800
llvm::MapVector<CanonicalDeclPtr<const Decl>,
7801
SmallVector<SmallVector<MapInfo, 8>, 4>>
7802
Info;
7803
7804
// Helper function to fill the information map for the different supported
7805
// clauses.
7806
auto &&InfoGen =
7807
[&Info, &SkipVarSet](
7808
const ValueDecl *D, MapKind Kind,
7809
OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7810
OpenMPMapClauseKind MapType,
7811
ArrayRef<OpenMPMapModifierKind> MapModifiers,
7812
ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7813
bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7814
const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7815
if (SkipVarSet.contains(D))
7816
return;
7817
auto It = Info.find(D);
7818
if (It == Info.end())
7819
It = Info
7820
.insert(std::make_pair(
7821
D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7822
.first;
7823
It->second[Kind].emplace_back(
7824
L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7825
IsImplicit, Mapper, VarRef, ForDeviceAddr);
7826
};
7827
7828
for (const auto *Cl : Clauses) {
7829
const auto *C = dyn_cast<OMPMapClause>(Cl);
7830
if (!C)
7831
continue;
7832
MapKind Kind = Other;
7833
if (llvm::is_contained(C->getMapTypeModifiers(),
7834
OMPC_MAP_MODIFIER_present))
7835
Kind = Present;
7836
else if (C->getMapType() == OMPC_MAP_alloc)
7837
Kind = Allocs;
7838
const auto *EI = C->getVarRefs().begin();
7839
for (const auto L : C->component_lists()) {
7840
const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7841
InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7842
C->getMapTypeModifiers(), std::nullopt,
7843
/*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7844
E);
7845
++EI;
7846
}
7847
}
7848
for (const auto *Cl : Clauses) {
7849
const auto *C = dyn_cast<OMPToClause>(Cl);
7850
if (!C)
7851
continue;
7852
MapKind Kind = Other;
7853
if (llvm::is_contained(C->getMotionModifiers(),
7854
OMPC_MOTION_MODIFIER_present))
7855
Kind = Present;
7856
const auto *EI = C->getVarRefs().begin();
7857
for (const auto L : C->component_lists()) {
7858
InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7859
C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7860
C->isImplicit(), std::get<2>(L), *EI);
7861
++EI;
7862
}
7863
}
7864
for (const auto *Cl : Clauses) {
7865
const auto *C = dyn_cast<OMPFromClause>(Cl);
7866
if (!C)
7867
continue;
7868
MapKind Kind = Other;
7869
if (llvm::is_contained(C->getMotionModifiers(),
7870
OMPC_MOTION_MODIFIER_present))
7871
Kind = Present;
7872
const auto *EI = C->getVarRefs().begin();
7873
for (const auto L : C->component_lists()) {
7874
InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7875
std::nullopt, C->getMotionModifiers(),
7876
/*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7877
*EI);
7878
++EI;
7879
}
7880
}
7881
7882
// Look at the use_device_ptr and use_device_addr clauses information and
7883
// mark the existing map entries as such. If there is no map information for
7884
// an entry in the use_device_ptr and use_device_addr list, we create one
7885
// with map type 'alloc' and zero size section. It is the user fault if that
7886
// was not mapped before. If there is no map information and the pointer is
7887
// a struct member, then we defer the emission of that entry until the whole
7888
// struct has been processed.
7889
llvm::MapVector<CanonicalDeclPtr<const Decl>,
7890
SmallVector<DeferredDevicePtrEntryTy, 4>>
7891
DeferredInfo;
7892
MapCombinedInfoTy UseDeviceDataCombinedInfo;
7893
7894
auto &&UseDeviceDataCombinedInfoGen =
7895
[&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7896
CodeGenFunction &CGF, bool IsDevAddr) {
7897
UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7898
UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7899
UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7900
UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7901
IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7902
UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7903
UseDeviceDataCombinedInfo.Sizes.push_back(
7904
llvm::Constant::getNullValue(CGF.Int64Ty));
7905
UseDeviceDataCombinedInfo.Types.push_back(
7906
OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7907
UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7908
};
7909
7910
auto &&MapInfoGen =
7911
[&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7912
&InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7913
OMPClauseMappableExprCommon::MappableExprComponentListRef
7914
Components,
7915
bool IsImplicit, bool IsDevAddr) {
7916
// We didn't find any match in our map information - generate a zero
7917
// size array section - if the pointer is a struct member we defer
7918
// this action until the whole struct has been processed.
7919
if (isa<MemberExpr>(IE)) {
7920
// Insert the pointer into Info to be processed by
7921
// generateInfoForComponentList. Because it is a member pointer
7922
// without a pointee, no entry will be generated for it, therefore
7923
// we need to generate one after the whole struct has been
7924
// processed. Nonetheless, generateInfoForComponentList must be
7925
// called to take the pointer into account for the calculation of
7926
// the range of the partial struct.
7927
InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7928
std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7929
nullptr, nullptr, IsDevAddr);
7930
DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7931
} else {
7932
llvm::Value *Ptr;
7933
if (IsDevAddr) {
7934
if (IE->isGLValue())
7935
Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7936
else
7937
Ptr = CGF.EmitScalarExpr(IE);
7938
} else {
7939
Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7940
}
7941
UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7942
}
7943
};
7944
7945
auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7946
const Expr *IE, bool IsDevAddr) -> bool {
7947
// We potentially have map information for this declaration already.
7948
// Look for the first set of components that refer to it. If found,
7949
// return true.
7950
// If the first component is a member expression, we have to look into
7951
// 'this', which maps to null in the map of map information. Otherwise
7952
// look directly for the information.
7953
auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7954
if (It != Info.end()) {
7955
bool Found = false;
7956
for (auto &Data : It->second) {
7957
auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7958
return MI.Components.back().getAssociatedDeclaration() == VD;
7959
});
7960
// If we found a map entry, signal that the pointer has to be
7961
// returned and move on to the next declaration. Exclude cases where
7962
// the base pointer is mapped as array subscript, array section or
7963
// array shaping. The base address is passed as a pointer to base in
7964
// this case and cannot be used as a base for use_device_ptr list
7965
// item.
7966
if (CI != Data.end()) {
7967
if (IsDevAddr) {
7968
CI->ForDeviceAddr = IsDevAddr;
7969
CI->ReturnDevicePointer = true;
7970
Found = true;
7971
break;
7972
} else {
7973
auto PrevCI = std::next(CI->Components.rbegin());
7974
const auto *VarD = dyn_cast<VarDecl>(VD);
7975
if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7976
isa<MemberExpr>(IE) ||
7977
!VD->getType().getNonReferenceType()->isPointerType() ||
7978
PrevCI == CI->Components.rend() ||
7979
isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7980
VarD->hasLocalStorage()) {
7981
CI->ForDeviceAddr = IsDevAddr;
7982
CI->ReturnDevicePointer = true;
7983
Found = true;
7984
break;
7985
}
7986
}
7987
}
7988
}
7989
return Found;
7990
}
7991
return false;
7992
};
7993
7994
// Look at the use_device_ptr clause information and mark the existing map
7995
// entries as such. If there is no map information for an entry in the
7996
// use_device_ptr list, we create one with map type 'alloc' and zero size
7997
// section. It is the user fault if that was not mapped before. If there is
7998
// no map information and the pointer is a struct member, then we defer the
7999
// emission of that entry until the whole struct has been processed.
8000
for (const auto *Cl : Clauses) {
8001
const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8002
if (!C)
8003
continue;
8004
for (const auto L : C->component_lists()) {
8005
OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8006
std::get<1>(L);
8007
assert(!Components.empty() &&
8008
"Not expecting empty list of components!");
8009
const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8010
VD = cast<ValueDecl>(VD->getCanonicalDecl());
8011
const Expr *IE = Components.back().getAssociatedExpression();
8012
if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8013
continue;
8014
MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8015
/*IsDevAddr=*/false);
8016
}
8017
}
8018
8019
llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8020
for (const auto *Cl : Clauses) {
8021
const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8022
if (!C)
8023
continue;
8024
for (const auto L : C->component_lists()) {
8025
OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8026
std::get<1>(L);
8027
assert(!std::get<1>(L).empty() &&
8028
"Not expecting empty list of components!");
8029
const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8030
if (!Processed.insert(VD).second)
8031
continue;
8032
VD = cast<ValueDecl>(VD->getCanonicalDecl());
8033
const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8034
if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8035
continue;
8036
MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8037
/*IsDevAddr=*/true);
8038
}
8039
}
8040
8041
for (const auto &Data : Info) {
8042
StructRangeInfoTy PartialStruct;
8043
// Current struct information:
8044
MapCombinedInfoTy CurInfo;
8045
// Current struct base information:
8046
MapCombinedInfoTy StructBaseCurInfo;
8047
const Decl *D = Data.first;
8048
const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8049
bool HasMapBasePtr = false;
8050
bool HasMapArraySec = false;
8051
if (VD && VD->getType()->isAnyPointerType()) {
8052
for (const auto &M : Data.second) {
8053
HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8054
return isa_and_present<DeclRefExpr>(L.VarRef);
8055
});
8056
HasMapArraySec = any_of(M, [](const MapInfo &L) {
8057
return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8058
L.VarRef);
8059
});
8060
if (HasMapBasePtr && HasMapArraySec)
8061
break;
8062
}
8063
}
8064
for (const auto &M : Data.second) {
8065
for (const MapInfo &L : M) {
8066
assert(!L.Components.empty() &&
8067
"Not expecting declaration with no component lists.");
8068
8069
// Remember the current base pointer index.
8070
unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8071
unsigned StructBasePointersIdx =
8072
StructBaseCurInfo.BasePointers.size();
8073
CurInfo.NonContigInfo.IsNonContiguous =
8074
L.Components.back().isNonContiguous();
8075
generateInfoForComponentList(
8076
L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8077
CurInfo, StructBaseCurInfo, PartialStruct,
8078
/*IsFirstComponentList=*/false, L.IsImplicit,
8079
/*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8080
L.VarRef, /*OverlappedElements*/ std::nullopt,
8081
HasMapBasePtr && HasMapArraySec);
8082
8083
// If this entry relates to a device pointer, set the relevant
8084
// declaration and add the 'return pointer' flag.
8085
if (L.ReturnDevicePointer) {
8086
// Check whether a value was added to either CurInfo or
8087
// StructBaseCurInfo and error if no value was added to either of
8088
// them:
8089
assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8090
StructBasePointersIdx <
8091
StructBaseCurInfo.BasePointers.size()) &&
8092
"Unexpected number of mapped base pointers.");
8093
8094
// Choose a base pointer index which is always valid:
8095
const ValueDecl *RelevantVD =
8096
L.Components.back().getAssociatedDeclaration();
8097
assert(RelevantVD &&
8098
"No relevant declaration related with device pointer??");
8099
8100
// If StructBaseCurInfo has been updated this iteration then work on
8101
// the first new entry added to it i.e. make sure that when multiple
8102
// values are added to any of the lists, the first value added is
8103
// being modified by the assignments below (not the last value
8104
// added).
8105
if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8106
StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8107
RelevantVD;
8108
StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8109
L.ForDeviceAddr ? DeviceInfoTy::Address
8110
: DeviceInfoTy::Pointer;
8111
StructBaseCurInfo.Types[StructBasePointersIdx] |=
8112
OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8113
} else {
8114
CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8115
CurInfo.DevicePointers[CurrentBasePointersIdx] =
8116
L.ForDeviceAddr ? DeviceInfoTy::Address
8117
: DeviceInfoTy::Pointer;
8118
CurInfo.Types[CurrentBasePointersIdx] |=
8119
OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8120
}
8121
}
8122
}
8123
}
8124
8125
// Append any pending zero-length pointers which are struct members and
8126
// used with use_device_ptr or use_device_addr.
8127
auto CI = DeferredInfo.find(Data.first);
8128
if (CI != DeferredInfo.end()) {
8129
for (const DeferredDevicePtrEntryTy &L : CI->second) {
8130
llvm::Value *BasePtr;
8131
llvm::Value *Ptr;
8132
if (L.ForDeviceAddr) {
8133
if (L.IE->isGLValue())
8134
Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8135
else
8136
Ptr = this->CGF.EmitScalarExpr(L.IE);
8137
BasePtr = Ptr;
8138
// Entry is RETURN_PARAM. Also, set the placeholder value
8139
// MEMBER_OF=FFFF so that the entry is later updated with the
8140
// correct value of MEMBER_OF.
8141
CurInfo.Types.push_back(
8142
OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8143
OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8144
} else {
8145
BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8146
Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8147
L.IE->getExprLoc());
8148
// Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8149
// placeholder value MEMBER_OF=FFFF so that the entry is later
8150
// updated with the correct value of MEMBER_OF.
8151
CurInfo.Types.push_back(
8152
OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8153
OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8154
OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8155
}
8156
CurInfo.Exprs.push_back(L.VD);
8157
CurInfo.BasePointers.emplace_back(BasePtr);
8158
CurInfo.DevicePtrDecls.emplace_back(L.VD);
8159
CurInfo.DevicePointers.emplace_back(
8160
L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8161
CurInfo.Pointers.push_back(Ptr);
8162
CurInfo.Sizes.push_back(
8163
llvm::Constant::getNullValue(this->CGF.Int64Ty));
8164
CurInfo.Mappers.push_back(nullptr);
8165
}
8166
}
8167
8168
// Unify entries in one list making sure the struct mapping precedes the
8169
// individual fields:
8170
MapCombinedInfoTy UnionCurInfo;
8171
UnionCurInfo.append(StructBaseCurInfo);
8172
UnionCurInfo.append(CurInfo);
8173
8174
// If there is an entry in PartialStruct it means we have a struct with
8175
// individual members mapped. Emit an extra combined entry.
8176
if (PartialStruct.Base.isValid()) {
8177
UnionCurInfo.NonContigInfo.Dims.push_back(0);
8178
// Emit a combined entry:
8179
emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8180
/*IsMapThis*/ !VD, OMPBuilder, VD);
8181
}
8182
8183
// We need to append the results of this capture to what we already have.
8184
CombinedInfo.append(UnionCurInfo);
8185
}
8186
// Append data for use_device_ptr clauses.
8187
CombinedInfo.append(UseDeviceDataCombinedInfo);
8188
}
8189
8190
public:
8191
MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8192
: CurDir(&Dir), CGF(CGF) {
8193
// Extract firstprivate clause information.
8194
for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8195
for (const auto *D : C->varlists())
8196
FirstPrivateDecls.try_emplace(
8197
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8198
// Extract implicit firstprivates from uses_allocators clauses.
8199
for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8200
for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8201
OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8202
if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8203
FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8204
/*Implicit=*/true);
8205
else if (const auto *VD = dyn_cast<VarDecl>(
8206
cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8207
->getDecl()))
8208
FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8209
}
8210
}
8211
// Extract device pointer clause information.
8212
for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8213
for (auto L : C->component_lists())
8214
DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8215
// Extract device addr clause information.
8216
for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8217
for (auto L : C->component_lists())
8218
HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8219
// Extract map information.
8220
for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8221
if (C->getMapType() != OMPC_MAP_to)
8222
continue;
8223
for (auto L : C->component_lists()) {
8224
const ValueDecl *VD = std::get<0>(L);
8225
const auto *RD = VD ? VD->getType()
8226
.getCanonicalType()
8227
.getNonReferenceType()
8228
->getAsCXXRecordDecl()
8229
: nullptr;
8230
if (RD && RD->isLambda())
8231
LambdasMap.try_emplace(std::get<0>(L), C);
8232
}
8233
}
8234
}
8235
8236
/// Constructor for the declare mapper directive.
8237
MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8238
: CurDir(&Dir), CGF(CGF) {}
8239
8240
/// Generate code for the combined entry if we have a partially mapped struct
8241
/// and take care of the mapping flags of the arguments corresponding to
8242
/// individual struct members.
8243
void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8244
MapFlagsArrayTy &CurTypes,
8245
const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8246
llvm::OpenMPIRBuilder &OMPBuilder,
8247
const ValueDecl *VD = nullptr,
8248
bool NotTargetParams = true) const {
8249
if (CurTypes.size() == 1 &&
8250
((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8251
OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8252
!PartialStruct.IsArraySection)
8253
return;
8254
Address LBAddr = PartialStruct.LowestElem.second;
8255
Address HBAddr = PartialStruct.HighestElem.second;
8256
if (PartialStruct.HasCompleteRecord) {
8257
LBAddr = PartialStruct.LB;
8258
HBAddr = PartialStruct.LB;
8259
}
8260
CombinedInfo.Exprs.push_back(VD);
8261
// Base is the base of the struct
8262
CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8263
CombinedInfo.DevicePtrDecls.push_back(nullptr);
8264
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8265
// Pointer is the address of the lowest element
8266
llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8267
const CXXMethodDecl *MD =
8268
CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8269
const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8270
bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8271
// There should not be a mapper for a combined entry.
8272
if (HasBaseClass) {
8273
// OpenMP 5.2 148:21:
8274
// If the target construct is within a class non-static member function,
8275
// and a variable is an accessible data member of the object for which the
8276
// non-static data member function is invoked, the variable is treated as
8277
// if the this[:1] expression had appeared in a map clause with a map-type
8278
// of tofrom.
8279
// Emit this[:1]
8280
CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8281
QualType Ty = MD->getFunctionObjectParameterType();
8282
llvm::Value *Size =
8283
CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8284
/*isSigned=*/true);
8285
CombinedInfo.Sizes.push_back(Size);
8286
} else {
8287
CombinedInfo.Pointers.push_back(LB);
8288
// Size is (addr of {highest+1} element) - (addr of lowest element)
8289
llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8290
llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8291
HBAddr.getElementType(), HB, /*Idx0=*/1);
8292
llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8293
llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8294
llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8295
llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8296
/*isSigned=*/false);
8297
CombinedInfo.Sizes.push_back(Size);
8298
}
8299
CombinedInfo.Mappers.push_back(nullptr);
8300
// Map type is always TARGET_PARAM, if generate info for captures.
8301
CombinedInfo.Types.push_back(
8302
NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8303
: OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8304
// If any element has the present modifier, then make sure the runtime
8305
// doesn't attempt to allocate the struct.
8306
if (CurTypes.end() !=
8307
llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8308
return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8309
Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8310
}))
8311
CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8312
// Remove TARGET_PARAM flag from the first element
8313
(*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8314
// If any element has the ompx_hold modifier, then make sure the runtime
8315
// uses the hold reference count for the struct as a whole so that it won't
8316
// be unmapped by an extra dynamic reference count decrement. Add it to all
8317
// elements as well so the runtime knows which reference count to check
8318
// when determining whether it's time for device-to-host transfers of
8319
// individual elements.
8320
if (CurTypes.end() !=
8321
llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8322
return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8323
Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8324
})) {
8325
CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8326
for (auto &M : CurTypes)
8327
M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8328
}
8329
8330
// All other current entries will be MEMBER_OF the combined entry
8331
// (except for PTR_AND_OBJ entries which do not have a placeholder value
8332
// 0xFFFF in the MEMBER_OF field).
8333
OpenMPOffloadMappingFlags MemberOfFlag =
8334
OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8335
for (auto &M : CurTypes)
8336
OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8337
}
8338
8339
/// Generate all the base pointers, section pointers, sizes, map types, and
8340
/// mappers for the extracted mappable expressions (all included in \a
8341
/// CombinedInfo). Also, for each item that relates with a device pointer, a
8342
/// pair of the relevant declaration and index where it occurs is appended to
8343
/// the device pointers info array.
8344
void generateAllInfo(
8345
MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8346
const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8347
llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8348
assert(CurDir.is<const OMPExecutableDirective *>() &&
8349
"Expect a executable directive");
8350
const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8351
generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8352
SkipVarSet);
8353
}
8354
8355
/// Generate all the base pointers, section pointers, sizes, map types, and
8356
/// mappers for the extracted map clauses of user-defined mapper (all included
8357
/// in \a CombinedInfo).
8358
void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8359
llvm::OpenMPIRBuilder &OMPBuilder) const {
8360
assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8361
"Expect a declare mapper directive");
8362
const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8363
generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8364
OMPBuilder);
8365
}
8366
8367
/// Emit capture info for lambdas for variables captured by reference.
8368
void generateInfoForLambdaCaptures(
8369
const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8370
llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8371
QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8372
const auto *RD = VDType->getAsCXXRecordDecl();
8373
if (!RD || !RD->isLambda())
8374
return;
8375
Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8376
CGF.getContext().getDeclAlign(VD));
8377
LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8378
llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8379
FieldDecl *ThisCapture = nullptr;
8380
RD->getCaptureFields(Captures, ThisCapture);
8381
if (ThisCapture) {
8382
LValue ThisLVal =
8383
CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8384
LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8385
LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8386
VDLVal.getPointer(CGF));
8387
CombinedInfo.Exprs.push_back(VD);
8388
CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8389
CombinedInfo.DevicePtrDecls.push_back(nullptr);
8390
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8391
CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8392
CombinedInfo.Sizes.push_back(
8393
CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8394
CGF.Int64Ty, /*isSigned=*/true));
8395
CombinedInfo.Types.push_back(
8396
OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8397
OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8398
OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8399
OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8400
CombinedInfo.Mappers.push_back(nullptr);
8401
}
8402
for (const LambdaCapture &LC : RD->captures()) {
8403
if (!LC.capturesVariable())
8404
continue;
8405
const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8406
if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8407
continue;
8408
auto It = Captures.find(VD);
8409
assert(It != Captures.end() && "Found lambda capture without field.");
8410
LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8411
if (LC.getCaptureKind() == LCK_ByRef) {
8412
LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8413
LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8414
VDLVal.getPointer(CGF));
8415
CombinedInfo.Exprs.push_back(VD);
8416
CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8417
CombinedInfo.DevicePtrDecls.push_back(nullptr);
8418
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8419
CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8420
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8421
CGF.getTypeSize(
8422
VD->getType().getCanonicalType().getNonReferenceType()),
8423
CGF.Int64Ty, /*isSigned=*/true));
8424
} else {
8425
RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8426
LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8427
VDLVal.getPointer(CGF));
8428
CombinedInfo.Exprs.push_back(VD);
8429
CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8430
CombinedInfo.DevicePtrDecls.push_back(nullptr);
8431
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8432
CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8433
CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8434
}
8435
CombinedInfo.Types.push_back(
8436
OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8437
OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8438
OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8439
OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8440
CombinedInfo.Mappers.push_back(nullptr);
8441
}
8442
}
8443
8444
/// Set correct indices for lambdas captures.
8445
void adjustMemberOfForLambdaCaptures(
8446
llvm::OpenMPIRBuilder &OMPBuilder,
8447
const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8448
MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8449
MapFlagsArrayTy &Types) const {
8450
for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8451
// Set correct member_of idx for all implicit lambda captures.
8452
if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8453
OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8454
OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8455
OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8456
continue;
8457
llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8458
assert(BasePtr && "Unable to find base lambda address.");
8459
int TgtIdx = -1;
8460
for (unsigned J = I; J > 0; --J) {
8461
unsigned Idx = J - 1;
8462
if (Pointers[Idx] != BasePtr)
8463
continue;
8464
TgtIdx = Idx;
8465
break;
8466
}
8467
assert(TgtIdx != -1 && "Unable to find parent lambda.");
8468
// All other current entries will be MEMBER_OF the combined entry
8469
// (except for PTR_AND_OBJ entries which do not have a placeholder value
8470
// 0xFFFF in the MEMBER_OF field).
8471
OpenMPOffloadMappingFlags MemberOfFlag =
8472
OMPBuilder.getMemberOfFlag(TgtIdx);
8473
OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8474
}
8475
}
8476
8477
/// Generate the base pointers, section pointers, sizes, map types, and
8478
/// mappers associated to a given capture (all included in \a CombinedInfo).
8479
void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8480
llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8481
StructRangeInfoTy &PartialStruct) const {
8482
assert(!Cap->capturesVariableArrayType() &&
8483
"Not expecting to generate map info for a variable array type!");
8484
8485
// We need to know when we generating information for the first component
8486
const ValueDecl *VD = Cap->capturesThis()
8487
? nullptr
8488
: Cap->getCapturedVar()->getCanonicalDecl();
8489
8490
// for map(to: lambda): skip here, processing it in
8491
// generateDefaultMapInfo
8492
if (LambdasMap.count(VD))
8493
return;
8494
8495
// If this declaration appears in a is_device_ptr clause we just have to
8496
// pass the pointer by value. If it is a reference to a declaration, we just
8497
// pass its value.
8498
if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8499
CombinedInfo.Exprs.push_back(VD);
8500
CombinedInfo.BasePointers.emplace_back(Arg);
8501
CombinedInfo.DevicePtrDecls.emplace_back(VD);
8502
CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8503
CombinedInfo.Pointers.push_back(Arg);
8504
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8505
CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8506
/*isSigned=*/true));
8507
CombinedInfo.Types.push_back(
8508
OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8509
OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8510
CombinedInfo.Mappers.push_back(nullptr);
8511
return;
8512
}
8513
8514
using MapData =
8515
std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8516
OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8517
const ValueDecl *, const Expr *>;
8518
SmallVector<MapData, 4> DeclComponentLists;
8519
// For member fields list in is_device_ptr, store it in
8520
// DeclComponentLists for generating components info.
8521
static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8522
auto It = DevPointersMap.find(VD);
8523
if (It != DevPointersMap.end())
8524
for (const auto &MCL : It->second)
8525
DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8526
/*IsImpicit = */ true, nullptr,
8527
nullptr);
8528
auto I = HasDevAddrsMap.find(VD);
8529
if (I != HasDevAddrsMap.end())
8530
for (const auto &MCL : I->second)
8531
DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8532
/*IsImpicit = */ true, nullptr,
8533
nullptr);
8534
assert(CurDir.is<const OMPExecutableDirective *>() &&
8535
"Expect a executable directive");
8536
const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8537
bool HasMapBasePtr = false;
8538
bool HasMapArraySec = false;
8539
for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8540
const auto *EI = C->getVarRefs().begin();
8541
for (const auto L : C->decl_component_lists(VD)) {
8542
const ValueDecl *VDecl, *Mapper;
8543
// The Expression is not correct if the mapping is implicit
8544
const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8545
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8546
std::tie(VDecl, Components, Mapper) = L;
8547
assert(VDecl == VD && "We got information for the wrong declaration??");
8548
assert(!Components.empty() &&
8549
"Not expecting declaration with no component lists.");
8550
if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8551
HasMapBasePtr = true;
8552
if (VD && E && VD->getType()->isAnyPointerType() &&
8553
(isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8554
HasMapArraySec = true;
8555
DeclComponentLists.emplace_back(Components, C->getMapType(),
8556
C->getMapTypeModifiers(),
8557
C->isImplicit(), Mapper, E);
8558
++EI;
8559
}
8560
}
8561
llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8562
const MapData &RHS) {
8563
ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8564
OpenMPMapClauseKind MapType = std::get<1>(RHS);
8565
bool HasPresent =
8566
llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8567
bool HasAllocs = MapType == OMPC_MAP_alloc;
8568
MapModifiers = std::get<2>(RHS);
8569
MapType = std::get<1>(LHS);
8570
bool HasPresentR =
8571
llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8572
bool HasAllocsR = MapType == OMPC_MAP_alloc;
8573
return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8574
});
8575
8576
// Find overlapping elements (including the offset from the base element).
8577
llvm::SmallDenseMap<
8578
const MapData *,
8579
llvm::SmallVector<
8580
OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8581
4>
8582
OverlappedData;
8583
size_t Count = 0;
8584
for (const MapData &L : DeclComponentLists) {
8585
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8586
OpenMPMapClauseKind MapType;
8587
ArrayRef<OpenMPMapModifierKind> MapModifiers;
8588
bool IsImplicit;
8589
const ValueDecl *Mapper;
8590
const Expr *VarRef;
8591
std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8592
L;
8593
++Count;
8594
for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8595
OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8596
std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8597
VarRef) = L1;
8598
auto CI = Components.rbegin();
8599
auto CE = Components.rend();
8600
auto SI = Components1.rbegin();
8601
auto SE = Components1.rend();
8602
for (; CI != CE && SI != SE; ++CI, ++SI) {
8603
if (CI->getAssociatedExpression()->getStmtClass() !=
8604
SI->getAssociatedExpression()->getStmtClass())
8605
break;
8606
// Are we dealing with different variables/fields?
8607
if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8608
break;
8609
}
8610
// Found overlapping if, at least for one component, reached the head
8611
// of the components list.
8612
if (CI == CE || SI == SE) {
8613
// Ignore it if it is the same component.
8614
if (CI == CE && SI == SE)
8615
continue;
8616
const auto It = (SI == SE) ? CI : SI;
8617
// If one component is a pointer and another one is a kind of
8618
// dereference of this pointer (array subscript, section, dereference,
8619
// etc.), it is not an overlapping.
8620
// Same, if one component is a base and another component is a
8621
// dereferenced pointer memberexpr with the same base.
8622
if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8623
(std::prev(It)->getAssociatedDeclaration() &&
8624
std::prev(It)
8625
->getAssociatedDeclaration()
8626
->getType()
8627
->isPointerType()) ||
8628
(It->getAssociatedDeclaration() &&
8629
It->getAssociatedDeclaration()->getType()->isPointerType() &&
8630
std::next(It) != CE && std::next(It) != SE))
8631
continue;
8632
const MapData &BaseData = CI == CE ? L : L1;
8633
OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8634
SI == SE ? Components : Components1;
8635
auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8636
OverlappedElements.getSecond().push_back(SubData);
8637
}
8638
}
8639
}
8640
// Sort the overlapped elements for each item.
8641
llvm::SmallVector<const FieldDecl *, 4> Layout;
8642
if (!OverlappedData.empty()) {
8643
const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8644
const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8645
while (BaseType != OrigType) {
8646
BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8647
OrigType = BaseType->getPointeeOrArrayElementType();
8648
}
8649
8650
if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8651
getPlainLayout(CRD, Layout, /*AsBase=*/false);
8652
else {
8653
const auto *RD = BaseType->getAsRecordDecl();
8654
Layout.append(RD->field_begin(), RD->field_end());
8655
}
8656
}
8657
for (auto &Pair : OverlappedData) {
8658
llvm::stable_sort(
8659
Pair.getSecond(),
8660
[&Layout](
8661
OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8662
OMPClauseMappableExprCommon::MappableExprComponentListRef
8663
Second) {
8664
auto CI = First.rbegin();
8665
auto CE = First.rend();
8666
auto SI = Second.rbegin();
8667
auto SE = Second.rend();
8668
for (; CI != CE && SI != SE; ++CI, ++SI) {
8669
if (CI->getAssociatedExpression()->getStmtClass() !=
8670
SI->getAssociatedExpression()->getStmtClass())
8671
break;
8672
// Are we dealing with different variables/fields?
8673
if (CI->getAssociatedDeclaration() !=
8674
SI->getAssociatedDeclaration())
8675
break;
8676
}
8677
8678
// Lists contain the same elements.
8679
if (CI == CE && SI == SE)
8680
return false;
8681
8682
// List with less elements is less than list with more elements.
8683
if (CI == CE || SI == SE)
8684
return CI == CE;
8685
8686
const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8687
const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8688
if (FD1->getParent() == FD2->getParent())
8689
return FD1->getFieldIndex() < FD2->getFieldIndex();
8690
const auto *It =
8691
llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8692
return FD == FD1 || FD == FD2;
8693
});
8694
return *It == FD1;
8695
});
8696
}
8697
8698
// Associated with a capture, because the mapping flags depend on it.
8699
// Go through all of the elements with the overlapped elements.
8700
bool IsFirstComponentList = true;
8701
MapCombinedInfoTy StructBaseCombinedInfo;
8702
for (const auto &Pair : OverlappedData) {
8703
const MapData &L = *Pair.getFirst();
8704
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8705
OpenMPMapClauseKind MapType;
8706
ArrayRef<OpenMPMapModifierKind> MapModifiers;
8707
bool IsImplicit;
8708
const ValueDecl *Mapper;
8709
const Expr *VarRef;
8710
std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8711
L;
8712
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8713
OverlappedComponents = Pair.getSecond();
8714
generateInfoForComponentList(
8715
MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8716
StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8717
IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8718
/*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8719
IsFirstComponentList = false;
8720
}
8721
// Go through other elements without overlapped elements.
8722
for (const MapData &L : DeclComponentLists) {
8723
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8724
OpenMPMapClauseKind MapType;
8725
ArrayRef<OpenMPMapModifierKind> MapModifiers;
8726
bool IsImplicit;
8727
const ValueDecl *Mapper;
8728
const Expr *VarRef;
8729
std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8730
L;
8731
auto It = OverlappedData.find(&L);
8732
if (It == OverlappedData.end())
8733
generateInfoForComponentList(
8734
MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8735
StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8736
IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8737
/*ForDeviceAddr=*/false, VD, VarRef,
8738
/*OverlappedElements*/ std::nullopt,
8739
HasMapBasePtr && HasMapArraySec);
8740
IsFirstComponentList = false;
8741
}
8742
}
8743
8744
/// Generate the default map information for a given capture \a CI,
8745
/// record field declaration \a RI and captured value \a CV.
8746
void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8747
const FieldDecl &RI, llvm::Value *CV,
8748
MapCombinedInfoTy &CombinedInfo) const {
8749
bool IsImplicit = true;
8750
// Do the default mapping.
8751
if (CI.capturesThis()) {
8752
CombinedInfo.Exprs.push_back(nullptr);
8753
CombinedInfo.BasePointers.push_back(CV);
8754
CombinedInfo.DevicePtrDecls.push_back(nullptr);
8755
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8756
CombinedInfo.Pointers.push_back(CV);
8757
const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8758
CombinedInfo.Sizes.push_back(
8759
CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8760
CGF.Int64Ty, /*isSigned=*/true));
8761
// Default map type.
8762
CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8763
OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8764
} else if (CI.capturesVariableByCopy()) {
8765
const VarDecl *VD = CI.getCapturedVar();
8766
CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8767
CombinedInfo.BasePointers.push_back(CV);
8768
CombinedInfo.DevicePtrDecls.push_back(nullptr);
8769
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8770
CombinedInfo.Pointers.push_back(CV);
8771
if (!RI.getType()->isAnyPointerType()) {
8772
// We have to signal to the runtime captures passed by value that are
8773
// not pointers.
8774
CombinedInfo.Types.push_back(
8775
OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8776
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8777
CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8778
} else {
8779
// Pointers are implicitly mapped with a zero size and no flags
8780
// (other than first map that is added for all implicit maps).
8781
CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8782
CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8783
}
8784
auto I = FirstPrivateDecls.find(VD);
8785
if (I != FirstPrivateDecls.end())
8786
IsImplicit = I->getSecond();
8787
} else {
8788
assert(CI.capturesVariable() && "Expected captured reference.");
8789
const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8790
QualType ElementType = PtrTy->getPointeeType();
8791
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8792
CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8793
// The default map type for a scalar/complex type is 'to' because by
8794
// default the value doesn't have to be retrieved. For an aggregate
8795
// type, the default is 'tofrom'.
8796
CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8797
const VarDecl *VD = CI.getCapturedVar();
8798
auto I = FirstPrivateDecls.find(VD);
8799
CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8800
CombinedInfo.BasePointers.push_back(CV);
8801
CombinedInfo.DevicePtrDecls.push_back(nullptr);
8802
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8803
if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8804
Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8805
CV, ElementType, CGF.getContext().getDeclAlign(VD),
8806
AlignmentSource::Decl));
8807
CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8808
} else {
8809
CombinedInfo.Pointers.push_back(CV);
8810
}
8811
if (I != FirstPrivateDecls.end())
8812
IsImplicit = I->getSecond();
8813
}
8814
// Every default map produces a single argument which is a target parameter.
8815
CombinedInfo.Types.back() |=
8816
OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8817
8818
// Add flag stating this is an implicit map.
8819
if (IsImplicit)
8820
CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8821
8822
// No user-defined mapper for default mapping.
8823
CombinedInfo.Mappers.push_back(nullptr);
8824
}
8825
};
8826
} // anonymous namespace
8827
8828
// Try to extract the base declaration from a `this->x` expression if possible.
8829
static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8830
if (!E)
8831
return nullptr;
8832
8833
if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8834
if (const MemberExpr *ME =
8835
dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8836
return ME->getMemberDecl();
8837
return nullptr;
8838
}
8839
8840
/// Emit a string constant containing the names of the values mapped to the
8841
/// offloading runtime library.
8842
llvm::Constant *
8843
emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8844
MappableExprsHandler::MappingExprInfo &MapExprs) {
8845
8846
uint32_t SrcLocStrSize;
8847
if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8848
return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8849
8850
SourceLocation Loc;
8851
if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8852
if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8853
Loc = VD->getLocation();
8854
else
8855
Loc = MapExprs.getMapExpr()->getExprLoc();
8856
} else {
8857
Loc = MapExprs.getMapDecl()->getLocation();
8858
}
8859
8860
std::string ExprName;
8861
if (MapExprs.getMapExpr()) {
8862
PrintingPolicy P(CGF.getContext().getLangOpts());
8863
llvm::raw_string_ostream OS(ExprName);
8864
MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8865
OS.flush();
8866
} else {
8867
ExprName = MapExprs.getMapDecl()->getNameAsString();
8868
}
8869
8870
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8871
return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8872
PLoc.getLine(), PLoc.getColumn(),
8873
SrcLocStrSize);
8874
}
8875
8876
/// Emit the arrays used to pass the captures and map information to the
8877
/// offloading runtime library. If there is no map or capture information,
8878
/// return nullptr by reference.
8879
static void emitOffloadingArrays(
8880
CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8881
CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8882
bool IsNonContiguous = false) {
8883
CodeGenModule &CGM = CGF.CGM;
8884
8885
// Reset the array information.
8886
Info.clearArrayInfo();
8887
Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8888
8889
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8890
InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8891
CGF.AllocaInsertPt->getIterator());
8892
InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8893
CGF.Builder.GetInsertPoint());
8894
8895
auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8896
return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8897
};
8898
if (CGM.getCodeGenOpts().getDebugInfo() !=
8899
llvm::codegenoptions::NoDebugInfo) {
8900
CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8901
llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8902
FillInfoMap);
8903
}
8904
8905
auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8906
if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8907
Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8908
}
8909
};
8910
8911
auto CustomMapperCB = [&](unsigned int I) {
8912
llvm::Value *MFunc = nullptr;
8913
if (CombinedInfo.Mappers[I]) {
8914
Info.HasMapper = true;
8915
MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8916
cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8917
}
8918
return MFunc;
8919
};
8920
OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8921
/*IsNonContiguous=*/true, DeviceAddrCB,
8922
CustomMapperCB);
8923
}
8924
8925
/// Check for inner distribute directive.
8926
static const OMPExecutableDirective *
8927
getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8928
const auto *CS = D.getInnermostCapturedStmt();
8929
const auto *Body =
8930
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8931
const Stmt *ChildStmt =
8932
CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8933
8934
if (const auto *NestedDir =
8935
dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8936
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8937
switch (D.getDirectiveKind()) {
8938
case OMPD_target:
8939
// For now, treat 'target' with nested 'teams loop' as if it's
8940
// distributed (target teams distribute).
8941
if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8942
return NestedDir;
8943
if (DKind == OMPD_teams) {
8944
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8945
/*IgnoreCaptured=*/true);
8946
if (!Body)
8947
return nullptr;
8948
ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8949
if (const auto *NND =
8950
dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8951
DKind = NND->getDirectiveKind();
8952
if (isOpenMPDistributeDirective(DKind))
8953
return NND;
8954
}
8955
}
8956
return nullptr;
8957
case OMPD_target_teams:
8958
if (isOpenMPDistributeDirective(DKind))
8959
return NestedDir;
8960
return nullptr;
8961
case OMPD_target_parallel:
8962
case OMPD_target_simd:
8963
case OMPD_target_parallel_for:
8964
case OMPD_target_parallel_for_simd:
8965
return nullptr;
8966
case OMPD_target_teams_distribute:
8967
case OMPD_target_teams_distribute_simd:
8968
case OMPD_target_teams_distribute_parallel_for:
8969
case OMPD_target_teams_distribute_parallel_for_simd:
8970
case OMPD_parallel:
8971
case OMPD_for:
8972
case OMPD_parallel_for:
8973
case OMPD_parallel_master:
8974
case OMPD_parallel_sections:
8975
case OMPD_for_simd:
8976
case OMPD_parallel_for_simd:
8977
case OMPD_cancel:
8978
case OMPD_cancellation_point:
8979
case OMPD_ordered:
8980
case OMPD_threadprivate:
8981
case OMPD_allocate:
8982
case OMPD_task:
8983
case OMPD_simd:
8984
case OMPD_tile:
8985
case OMPD_unroll:
8986
case OMPD_sections:
8987
case OMPD_section:
8988
case OMPD_single:
8989
case OMPD_master:
8990
case OMPD_critical:
8991
case OMPD_taskyield:
8992
case OMPD_barrier:
8993
case OMPD_taskwait:
8994
case OMPD_taskgroup:
8995
case OMPD_atomic:
8996
case OMPD_flush:
8997
case OMPD_depobj:
8998
case OMPD_scan:
8999
case OMPD_teams:
9000
case OMPD_target_data:
9001
case OMPD_target_exit_data:
9002
case OMPD_target_enter_data:
9003
case OMPD_distribute:
9004
case OMPD_distribute_simd:
9005
case OMPD_distribute_parallel_for:
9006
case OMPD_distribute_parallel_for_simd:
9007
case OMPD_teams_distribute:
9008
case OMPD_teams_distribute_simd:
9009
case OMPD_teams_distribute_parallel_for:
9010
case OMPD_teams_distribute_parallel_for_simd:
9011
case OMPD_target_update:
9012
case OMPD_declare_simd:
9013
case OMPD_declare_variant:
9014
case OMPD_begin_declare_variant:
9015
case OMPD_end_declare_variant:
9016
case OMPD_declare_target:
9017
case OMPD_end_declare_target:
9018
case OMPD_declare_reduction:
9019
case OMPD_declare_mapper:
9020
case OMPD_taskloop:
9021
case OMPD_taskloop_simd:
9022
case OMPD_master_taskloop:
9023
case OMPD_master_taskloop_simd:
9024
case OMPD_parallel_master_taskloop:
9025
case OMPD_parallel_master_taskloop_simd:
9026
case OMPD_requires:
9027
case OMPD_metadirective:
9028
case OMPD_unknown:
9029
default:
9030
llvm_unreachable("Unexpected directive.");
9031
}
9032
}
9033
9034
return nullptr;
9035
}
9036
9037
/// Emit the user-defined mapper function. The code generation follows the
9038
/// pattern in the example below.
9039
/// \code
9040
/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9041
/// void *base, void *begin,
9042
/// int64_t size, int64_t type,
9043
/// void *name = nullptr) {
9044
/// // Allocate space for an array section first or add a base/begin for
9045
/// // pointer dereference.
9046
/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9047
/// !maptype.IsDelete)
9048
/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9049
/// size*sizeof(Ty), clearToFromMember(type));
9050
/// // Map members.
9051
/// for (unsigned i = 0; i < size; i++) {
9052
/// // For each component specified by this mapper:
9053
/// for (auto c : begin[i]->all_components) {
9054
/// if (c.hasMapper())
9055
/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9056
/// c.arg_type, c.arg_name);
9057
/// else
9058
/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9059
/// c.arg_begin, c.arg_size, c.arg_type,
9060
/// c.arg_name);
9061
/// }
9062
/// }
9063
/// // Delete the array section.
9064
/// if (size > 1 && maptype.IsDelete)
9065
/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9066
/// size*sizeof(Ty), clearToFromMember(type));
9067
/// }
9068
/// \endcode
9069
void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9070
CodeGenFunction *CGF) {
9071
if (UDMMap.count(D) > 0)
9072
return;
9073
ASTContext &C = CGM.getContext();
9074
QualType Ty = D->getType();
9075
QualType PtrTy = C.getPointerType(Ty).withRestrict();
9076
QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9077
auto *MapperVarDecl =
9078
cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9079
SourceLocation Loc = D->getLocation();
9080
CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9081
llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9082
9083
// Prepare mapper function arguments and attributes.
9084
ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9085
C.VoidPtrTy, ImplicitParamKind::Other);
9086
ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9087
ImplicitParamKind::Other);
9088
ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9089
C.VoidPtrTy, ImplicitParamKind::Other);
9090
ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9091
ImplicitParamKind::Other);
9092
ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9093
ImplicitParamKind::Other);
9094
ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9095
ImplicitParamKind::Other);
9096
FunctionArgList Args;
9097
Args.push_back(&HandleArg);
9098
Args.push_back(&BaseArg);
9099
Args.push_back(&BeginArg);
9100
Args.push_back(&SizeArg);
9101
Args.push_back(&TypeArg);
9102
Args.push_back(&NameArg);
9103
const CGFunctionInfo &FnInfo =
9104
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9105
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9106
SmallString<64> TyStr;
9107
llvm::raw_svector_ostream Out(TyStr);
9108
CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9109
std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9110
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9111
Name, &CGM.getModule());
9112
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9113
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9114
// Start the mapper function code generation.
9115
CodeGenFunction MapperCGF(CGM);
9116
MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9117
// Compute the starting and end addresses of array elements.
9118
llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9119
MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9120
C.getPointerType(Int64Ty), Loc);
9121
// Prepare common arguments for array initiation and deletion.
9122
llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9123
MapperCGF.GetAddrOfLocalVar(&HandleArg),
9124
/*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9125
llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9126
MapperCGF.GetAddrOfLocalVar(&BaseArg),
9127
/*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9128
llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9129
MapperCGF.GetAddrOfLocalVar(&BeginArg),
9130
/*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9131
// Convert the size in bytes into the number of array elements.
9132
Size = MapperCGF.Builder.CreateExactUDiv(
9133
Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9134
llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9135
BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9136
llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9137
llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9138
MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9139
C.getPointerType(Int64Ty), Loc);
9140
llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9141
MapperCGF.GetAddrOfLocalVar(&NameArg),
9142
/*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9143
9144
// Emit array initiation if this is an array section and \p MapType indicates
9145
// that memory allocation is required.
9146
llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9147
emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9148
MapName, ElementSize, HeadBB, /*IsInit=*/true);
9149
9150
// Emit a for loop to iterate through SizeArg of elements and map all of them.
9151
9152
// Emit the loop header block.
9153
MapperCGF.EmitBlock(HeadBB);
9154
llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9155
llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9156
// Evaluate whether the initial condition is satisfied.
9157
llvm::Value *IsEmpty =
9158
MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9159
MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9160
llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9161
9162
// Emit the loop body block.
9163
MapperCGF.EmitBlock(BodyBB);
9164
llvm::BasicBlock *LastBB = BodyBB;
9165
llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9166
PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9167
PtrPHI->addIncoming(PtrBegin, EntryBB);
9168
Address PtrCurrent(PtrPHI, ElemTy,
9169
MapperCGF.GetAddrOfLocalVar(&BeginArg)
9170
.getAlignment()
9171
.alignmentOfArrayElement(ElementSize));
9172
// Privatize the declared variable of mapper to be the current array element.
9173
CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9174
Scope.addPrivate(MapperVarDecl, PtrCurrent);
9175
(void)Scope.Privatize();
9176
9177
// Get map clause information. Fill up the arrays with all mapped variables.
9178
MappableExprsHandler::MapCombinedInfoTy Info;
9179
MappableExprsHandler MEHandler(*D, MapperCGF);
9180
MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9181
9182
// Call the runtime API __tgt_mapper_num_components to get the number of
9183
// pre-existing components.
9184
llvm::Value *OffloadingArgs[] = {Handle};
9185
llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9186
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9187
OMPRTL___tgt_mapper_num_components),
9188
OffloadingArgs);
9189
llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9190
PreviousSize,
9191
MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9192
9193
// Fill up the runtime mapper handle for all components.
9194
for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9195
llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9196
Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9197
llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9198
Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9199
llvm::Value *CurSizeArg = Info.Sizes[I];
9200
llvm::Value *CurNameArg =
9201
(CGM.getCodeGenOpts().getDebugInfo() ==
9202
llvm::codegenoptions::NoDebugInfo)
9203
? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9204
: emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9205
9206
// Extract the MEMBER_OF field from the map type.
9207
llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9208
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9209
Info.Types[I]));
9210
llvm::Value *MemberMapType =
9211
MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9212
9213
// Combine the map type inherited from user-defined mapper with that
9214
// specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9215
// bits of the \a MapType, which is the input argument of the mapper
9216
// function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9217
// bits of MemberMapType.
9218
// [OpenMP 5.0], 1.2.6. map-type decay.
9219
// | alloc | to | from | tofrom | release | delete
9220
// ----------------------------------------------------------
9221
// alloc | alloc | alloc | alloc | alloc | release | delete
9222
// to | alloc | to | alloc | to | release | delete
9223
// from | alloc | alloc | from | from | release | delete
9224
// tofrom | alloc | to | from | tofrom | release | delete
9225
llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9226
MapType,
9227
MapperCGF.Builder.getInt64(
9228
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9229
OpenMPOffloadMappingFlags::OMP_MAP_TO |
9230
OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9231
llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9232
llvm::BasicBlock *AllocElseBB =
9233
MapperCGF.createBasicBlock("omp.type.alloc.else");
9234
llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9235
llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9236
llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9237
llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9238
llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9239
MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9240
// In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9241
MapperCGF.EmitBlock(AllocBB);
9242
llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9243
MemberMapType,
9244
MapperCGF.Builder.getInt64(
9245
~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9246
OpenMPOffloadMappingFlags::OMP_MAP_TO |
9247
OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9248
MapperCGF.Builder.CreateBr(EndBB);
9249
MapperCGF.EmitBlock(AllocElseBB);
9250
llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9251
LeftToFrom,
9252
MapperCGF.Builder.getInt64(
9253
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9254
OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9255
MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9256
// In case of to, clear OMP_MAP_FROM.
9257
MapperCGF.EmitBlock(ToBB);
9258
llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9259
MemberMapType,
9260
MapperCGF.Builder.getInt64(
9261
~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9262
OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9263
MapperCGF.Builder.CreateBr(EndBB);
9264
MapperCGF.EmitBlock(ToElseBB);
9265
llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9266
LeftToFrom,
9267
MapperCGF.Builder.getInt64(
9268
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9269
OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9270
MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9271
// In case of from, clear OMP_MAP_TO.
9272
MapperCGF.EmitBlock(FromBB);
9273
llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9274
MemberMapType,
9275
MapperCGF.Builder.getInt64(
9276
~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9277
OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9278
// In case of tofrom, do nothing.
9279
MapperCGF.EmitBlock(EndBB);
9280
LastBB = EndBB;
9281
llvm::PHINode *CurMapType =
9282
MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9283
CurMapType->addIncoming(AllocMapType, AllocBB);
9284
CurMapType->addIncoming(ToMapType, ToBB);
9285
CurMapType->addIncoming(FromMapType, FromBB);
9286
CurMapType->addIncoming(MemberMapType, ToElseBB);
9287
9288
llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9289
CurSizeArg, CurMapType, CurNameArg};
9290
if (Info.Mappers[I]) {
9291
// Call the corresponding mapper function.
9292
llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9293
cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9294
assert(MapperFunc && "Expect a valid mapper function is available.");
9295
MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9296
} else {
9297
// Call the runtime API __tgt_push_mapper_component to fill up the runtime
9298
// data structure.
9299
MapperCGF.EmitRuntimeCall(
9300
OMPBuilder.getOrCreateRuntimeFunction(
9301
CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9302
OffloadingArgs);
9303
}
9304
}
9305
9306
// Update the pointer to point to the next element that needs to be mapped,
9307
// and check whether we have mapped all elements.
9308
llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9309
ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9310
PtrPHI->addIncoming(PtrNext, LastBB);
9311
llvm::Value *IsDone =
9312
MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9313
llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9314
MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9315
9316
MapperCGF.EmitBlock(ExitBB);
9317
// Emit array deletion if this is an array section and \p MapType indicates
9318
// that deletion is required.
9319
emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9320
MapName, ElementSize, DoneBB, /*IsInit=*/false);
9321
9322
// Emit the function exit block.
9323
MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9324
MapperCGF.FinishFunction();
9325
UDMMap.try_emplace(D, Fn);
9326
if (CGF) {
9327
auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9328
Decls.second.push_back(D);
9329
}
9330
}
9331
9332
/// Emit the array initialization or deletion portion for user-defined mapper
9333
/// code generation. First, it evaluates whether an array section is mapped and
9334
/// whether the \a MapType instructs to delete this section. If \a IsInit is
9335
/// true, and \a MapType indicates to not delete this array, array
9336
/// initialization code is generated. If \a IsInit is false, and \a MapType
9337
/// indicates to not this array, array deletion code is generated.
9338
void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9339
CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9340
llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9341
llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9342
bool IsInit) {
9343
StringRef Prefix = IsInit ? ".init" : ".del";
9344
9345
// Evaluate if this is an array section.
9346
llvm::BasicBlock *BodyBB =
9347
MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9348
llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9349
Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9350
llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9351
MapType,
9352
MapperCGF.Builder.getInt64(
9353
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9354
OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9355
llvm::Value *DeleteCond;
9356
llvm::Value *Cond;
9357
if (IsInit) {
9358
// base != begin?
9359
llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9360
// IsPtrAndObj?
9361
llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9362
MapType,
9363
MapperCGF.Builder.getInt64(
9364
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9365
OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9366
PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9367
BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9368
Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9369
DeleteCond = MapperCGF.Builder.CreateIsNull(
9370
DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9371
} else {
9372
Cond = IsArray;
9373
DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9374
DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9375
}
9376
Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9377
MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9378
9379
MapperCGF.EmitBlock(BodyBB);
9380
// Get the array size by multiplying element size and element number (i.e., \p
9381
// Size).
9382
llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9383
Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9384
// Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9385
// memory allocation/deletion purpose only.
9386
llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9387
MapType,
9388
MapperCGF.Builder.getInt64(
9389
~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9390
OpenMPOffloadMappingFlags::OMP_MAP_TO |
9391
OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9392
MapTypeArg = MapperCGF.Builder.CreateOr(
9393
MapTypeArg,
9394
MapperCGF.Builder.getInt64(
9395
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9396
OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9397
9398
// Call the runtime API __tgt_push_mapper_component to fill up the runtime
9399
// data structure.
9400
llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9401
ArraySize, MapTypeArg, MapName};
9402
MapperCGF.EmitRuntimeCall(
9403
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9404
OMPRTL___tgt_push_mapper_component),
9405
OffloadingArgs);
9406
}
9407
9408
llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9409
const OMPDeclareMapperDecl *D) {
9410
auto I = UDMMap.find(D);
9411
if (I != UDMMap.end())
9412
return I->second;
9413
emitUserDefinedMapper(D);
9414
return UDMMap.lookup(D);
9415
}
9416
9417
llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9418
CodeGenFunction &CGF, const OMPExecutableDirective &D,
9419
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9420
const OMPLoopDirective &D)>
9421
SizeEmitter) {
9422
OpenMPDirectiveKind Kind = D.getDirectiveKind();
9423
const OMPExecutableDirective *TD = &D;
9424
// Get nested teams distribute kind directive, if any. For now, treat
9425
// 'target_teams_loop' as if it's really a target_teams_distribute.
9426
if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9427
Kind != OMPD_target_teams_loop)
9428
TD = getNestedDistributeDirective(CGM.getContext(), D);
9429
if (!TD)
9430
return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9431
9432
const auto *LD = cast<OMPLoopDirective>(TD);
9433
if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9434
return NumIterations;
9435
return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9436
}
9437
9438
static void
9439
emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9440
const OMPExecutableDirective &D,
9441
llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9442
bool RequiresOuterTask, const CapturedStmt &CS,
9443
bool OffloadingMandatory, CodeGenFunction &CGF) {
9444
if (OffloadingMandatory) {
9445
CGF.Builder.CreateUnreachable();
9446
} else {
9447
if (RequiresOuterTask) {
9448
CapturedVars.clear();
9449
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9450
}
9451
OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9452
CapturedVars);
9453
}
9454
}
9455
9456
static llvm::Value *emitDeviceID(
9457
llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9458
CodeGenFunction &CGF) {
9459
// Emit device ID if any.
9460
llvm::Value *DeviceID;
9461
if (Device.getPointer()) {
9462
assert((Device.getInt() == OMPC_DEVICE_unknown ||
9463
Device.getInt() == OMPC_DEVICE_device_num) &&
9464
"Expected device_num modifier.");
9465
llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9466
DeviceID =
9467
CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9468
} else {
9469
DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9470
}
9471
return DeviceID;
9472
}
9473
9474
llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9475
CodeGenFunction &CGF) {
9476
llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9477
9478
if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9479
CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9480
llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9481
DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9482
DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9483
/*isSigned=*/false);
9484
}
9485
return DynCGroupMem;
9486
}
9487
9488
static void emitTargetCallKernelLaunch(
9489
CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9490
const OMPExecutableDirective &D,
9491
llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9492
const CapturedStmt &CS, bool OffloadingMandatory,
9493
llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9494
llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9495
llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9496
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9497
const OMPLoopDirective &D)>
9498
SizeEmitter,
9499
CodeGenFunction &CGF, CodeGenModule &CGM) {
9500
llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9501
9502
// Fill up the arrays with all the captured variables.
9503
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9504
9505
// Get mappable expression information.
9506
MappableExprsHandler MEHandler(D, CGF);
9507
llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9508
llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9509
9510
auto RI = CS.getCapturedRecordDecl()->field_begin();
9511
auto *CV = CapturedVars.begin();
9512
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9513
CE = CS.capture_end();
9514
CI != CE; ++CI, ++RI, ++CV) {
9515
MappableExprsHandler::MapCombinedInfoTy CurInfo;
9516
MappableExprsHandler::StructRangeInfoTy PartialStruct;
9517
9518
// VLA sizes are passed to the outlined region by copy and do not have map
9519
// information associated.
9520
if (CI->capturesVariableArrayType()) {
9521
CurInfo.Exprs.push_back(nullptr);
9522
CurInfo.BasePointers.push_back(*CV);
9523
CurInfo.DevicePtrDecls.push_back(nullptr);
9524
CurInfo.DevicePointers.push_back(
9525
MappableExprsHandler::DeviceInfoTy::None);
9526
CurInfo.Pointers.push_back(*CV);
9527
CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9528
CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9529
// Copy to the device as an argument. No need to retrieve it.
9530
CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9531
OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9532
OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9533
CurInfo.Mappers.push_back(nullptr);
9534
} else {
9535
// If we have any information in the map clause, we use it, otherwise we
9536
// just do a default mapping.
9537
MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9538
if (!CI->capturesThis())
9539
MappedVarSet.insert(CI->getCapturedVar());
9540
else
9541
MappedVarSet.insert(nullptr);
9542
if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9543
MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9544
// Generate correct mapping for variables captured by reference in
9545
// lambdas.
9546
if (CI->capturesVariable())
9547
MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9548
CurInfo, LambdaPointers);
9549
}
9550
// We expect to have at least an element of information for this capture.
9551
assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9552
"Non-existing map pointer for capture!");
9553
assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9554
CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9555
CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9556
CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9557
"Inconsistent map information sizes!");
9558
9559
// If there is an entry in PartialStruct it means we have a struct with
9560
// individual members mapped. Emit an extra combined entry.
9561
if (PartialStruct.Base.isValid()) {
9562
CombinedInfo.append(PartialStruct.PreliminaryMapData);
9563
MEHandler.emitCombinedEntry(
9564
CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9565
OMPBuilder, nullptr,
9566
!PartialStruct.PreliminaryMapData.BasePointers.empty());
9567
}
9568
9569
// We need to append the results of this capture to what we already have.
9570
CombinedInfo.append(CurInfo);
9571
}
9572
// Adjust MEMBER_OF flags for the lambdas captures.
9573
MEHandler.adjustMemberOfForLambdaCaptures(
9574
OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9575
CombinedInfo.Pointers, CombinedInfo.Types);
9576
// Map any list items in a map clause that were not captures because they
9577
// weren't referenced within the construct.
9578
MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9579
9580
CGOpenMPRuntime::TargetDataInfo Info;
9581
// Fill up the arrays and create the arguments.
9582
emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9583
bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9584
llvm::codegenoptions::NoDebugInfo;
9585
OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9586
EmitDebug,
9587
/*ForEndCall=*/false);
9588
9589
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9590
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9591
CGF.VoidPtrTy, CGM.getPointerAlign());
9592
InputInfo.PointersArray =
9593
Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9594
InputInfo.SizesArray =
9595
Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9596
InputInfo.MappersArray =
9597
Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9598
MapTypesArray = Info.RTArgs.MapTypesArray;
9599
MapNamesArray = Info.RTArgs.MapNamesArray;
9600
9601
auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9602
RequiresOuterTask, &CS, OffloadingMandatory, Device,
9603
OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9604
SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9605
bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9606
9607
if (IsReverseOffloading) {
9608
// Reverse offloading is not supported, so just execute on the host.
9609
// FIXME: This fallback solution is incorrect since it ignores the
9610
// OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9611
// assert here and ensure SEMA emits an error.
9612
emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9613
RequiresOuterTask, CS, OffloadingMandatory, CGF);
9614
return;
9615
}
9616
9617
bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9618
unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9619
9620
llvm::Value *BasePointersArray =
9621
InputInfo.BasePointersArray.emitRawPointer(CGF);
9622
llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9623
llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9624
llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9625
9626
auto &&EmitTargetCallFallbackCB =
9627
[&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9628
OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9629
-> llvm::OpenMPIRBuilder::InsertPointTy {
9630
CGF.Builder.restoreIP(IP);
9631
emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9632
RequiresOuterTask, CS, OffloadingMandatory, CGF);
9633
return CGF.Builder.saveIP();
9634
};
9635
9636
llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9637
llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9638
llvm::Value *NumThreads =
9639
OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9640
llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9641
llvm::Value *NumIterations =
9642
OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9643
llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9644
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9645
CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9646
9647
llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9648
BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9649
nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9650
9651
llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9652
NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9653
DynCGGroupMem, HasNoWait);
9654
9655
CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9656
CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9657
DeviceID, RTLoc, AllocaIP));
9658
};
9659
9660
if (RequiresOuterTask)
9661
CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9662
else
9663
OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9664
}
9665
9666
static void
9667
emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9668
const OMPExecutableDirective &D,
9669
llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9670
bool RequiresOuterTask, const CapturedStmt &CS,
9671
bool OffloadingMandatory, CodeGenFunction &CGF) {
9672
9673
// Notify that the host version must be executed.
9674
auto &&ElseGen =
9675
[&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9676
OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9677
emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9678
RequiresOuterTask, CS, OffloadingMandatory, CGF);
9679
};
9680
9681
if (RequiresOuterTask) {
9682
CodeGenFunction::OMPTargetDataInfo InputInfo;
9683
CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9684
} else {
9685
OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9686
}
9687
}
9688
9689
void CGOpenMPRuntime::emitTargetCall(
9690
CodeGenFunction &CGF, const OMPExecutableDirective &D,
9691
llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9692
llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9693
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9694
const OMPLoopDirective &D)>
9695
SizeEmitter) {
9696
if (!CGF.HaveInsertPoint())
9697
return;
9698
9699
const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9700
CGM.getLangOpts().OpenMPOffloadMandatory;
9701
9702
assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9703
9704
const bool RequiresOuterTask =
9705
D.hasClausesOfKind<OMPDependClause>() ||
9706
D.hasClausesOfKind<OMPNowaitClause>() ||
9707
D.hasClausesOfKind<OMPInReductionClause>() ||
9708
(CGM.getLangOpts().OpenMP >= 51 &&
9709
needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9710
D.hasClausesOfKind<OMPThreadLimitClause>());
9711
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9712
const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9713
auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9714
PrePostActionTy &) {
9715
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9716
};
9717
emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9718
9719
CodeGenFunction::OMPTargetDataInfo InputInfo;
9720
llvm::Value *MapTypesArray = nullptr;
9721
llvm::Value *MapNamesArray = nullptr;
9722
9723
auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9724
RequiresOuterTask, &CS, OffloadingMandatory, Device,
9725
OutlinedFnID, &InputInfo, &MapTypesArray,
9726
&MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9727
PrePostActionTy &) {
9728
emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9729
RequiresOuterTask, CS, OffloadingMandatory,
9730
Device, OutlinedFnID, InputInfo, MapTypesArray,
9731
MapNamesArray, SizeEmitter, CGF, CGM);
9732
};
9733
9734
auto &&TargetElseGen =
9735
[this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9736
OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9737
emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9738
CS, OffloadingMandatory, CGF);
9739
};
9740
9741
// If we have a target function ID it means that we need to support
9742
// offloading, otherwise, just execute on the host. We need to execute on host
9743
// regardless of the conditional in the if clause if, e.g., the user do not
9744
// specify target triples.
9745
if (OutlinedFnID) {
9746
if (IfCond) {
9747
emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9748
} else {
9749
RegionCodeGenTy ThenRCG(TargetThenGen);
9750
ThenRCG(CGF);
9751
}
9752
} else {
9753
RegionCodeGenTy ElseRCG(TargetElseGen);
9754
ElseRCG(CGF);
9755
}
9756
}
9757
9758
void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9759
StringRef ParentName) {
9760
if (!S)
9761
return;
9762
9763
// Codegen OMP target directives that offload compute to the device.
9764
bool RequiresDeviceCodegen =
9765
isa<OMPExecutableDirective>(S) &&
9766
isOpenMPTargetExecutionDirective(
9767
cast<OMPExecutableDirective>(S)->getDirectiveKind());
9768
9769
if (RequiresDeviceCodegen) {
9770
const auto &E = *cast<OMPExecutableDirective>(S);
9771
9772
llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9773
CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9774
9775
// Is this a target region that should not be emitted as an entry point? If
9776
// so just signal we are done with this target region.
9777
if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9778
return;
9779
9780
switch (E.getDirectiveKind()) {
9781
case OMPD_target:
9782
CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9783
cast<OMPTargetDirective>(E));
9784
break;
9785
case OMPD_target_parallel:
9786
CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9787
CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9788
break;
9789
case OMPD_target_teams:
9790
CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9791
CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9792
break;
9793
case OMPD_target_teams_distribute:
9794
CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9795
CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9796
break;
9797
case OMPD_target_teams_distribute_simd:
9798
CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9799
CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9800
break;
9801
case OMPD_target_parallel_for:
9802
CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9803
CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9804
break;
9805
case OMPD_target_parallel_for_simd:
9806
CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9807
CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9808
break;
9809
case OMPD_target_simd:
9810
CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9811
CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9812
break;
9813
case OMPD_target_teams_distribute_parallel_for:
9814
CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9815
CGM, ParentName,
9816
cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9817
break;
9818
case OMPD_target_teams_distribute_parallel_for_simd:
9819
CodeGenFunction::
9820
EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9821
CGM, ParentName,
9822
cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9823
break;
9824
case OMPD_target_teams_loop:
9825
CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9826
CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9827
break;
9828
case OMPD_target_parallel_loop:
9829
CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9830
CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9831
break;
9832
case OMPD_parallel:
9833
case OMPD_for:
9834
case OMPD_parallel_for:
9835
case OMPD_parallel_master:
9836
case OMPD_parallel_sections:
9837
case OMPD_for_simd:
9838
case OMPD_parallel_for_simd:
9839
case OMPD_cancel:
9840
case OMPD_cancellation_point:
9841
case OMPD_ordered:
9842
case OMPD_threadprivate:
9843
case OMPD_allocate:
9844
case OMPD_task:
9845
case OMPD_simd:
9846
case OMPD_tile:
9847
case OMPD_unroll:
9848
case OMPD_sections:
9849
case OMPD_section:
9850
case OMPD_single:
9851
case OMPD_master:
9852
case OMPD_critical:
9853
case OMPD_taskyield:
9854
case OMPD_barrier:
9855
case OMPD_taskwait:
9856
case OMPD_taskgroup:
9857
case OMPD_atomic:
9858
case OMPD_flush:
9859
case OMPD_depobj:
9860
case OMPD_scan:
9861
case OMPD_teams:
9862
case OMPD_target_data:
9863
case OMPD_target_exit_data:
9864
case OMPD_target_enter_data:
9865
case OMPD_distribute:
9866
case OMPD_distribute_simd:
9867
case OMPD_distribute_parallel_for:
9868
case OMPD_distribute_parallel_for_simd:
9869
case OMPD_teams_distribute:
9870
case OMPD_teams_distribute_simd:
9871
case OMPD_teams_distribute_parallel_for:
9872
case OMPD_teams_distribute_parallel_for_simd:
9873
case OMPD_target_update:
9874
case OMPD_declare_simd:
9875
case OMPD_declare_variant:
9876
case OMPD_begin_declare_variant:
9877
case OMPD_end_declare_variant:
9878
case OMPD_declare_target:
9879
case OMPD_end_declare_target:
9880
case OMPD_declare_reduction:
9881
case OMPD_declare_mapper:
9882
case OMPD_taskloop:
9883
case OMPD_taskloop_simd:
9884
case OMPD_master_taskloop:
9885
case OMPD_master_taskloop_simd:
9886
case OMPD_parallel_master_taskloop:
9887
case OMPD_parallel_master_taskloop_simd:
9888
case OMPD_requires:
9889
case OMPD_metadirective:
9890
case OMPD_unknown:
9891
default:
9892
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9893
}
9894
return;
9895
}
9896
9897
if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9898
if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9899
return;
9900
9901
scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9902
return;
9903
}
9904
9905
// If this is a lambda function, look into its body.
9906
if (const auto *L = dyn_cast<LambdaExpr>(S))
9907
S = L->getBody();
9908
9909
// Keep looking for target regions recursively.
9910
for (const Stmt *II : S->children())
9911
scanForTargetRegionsFunctions(II, ParentName);
9912
}
9913
9914
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9915
std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9916
OMPDeclareTargetDeclAttr::getDeviceType(VD);
9917
if (!DevTy)
9918
return false;
9919
// Do not emit device_type(nohost) functions for the host.
9920
if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9921
return true;
9922
// Do not emit device_type(host) functions for the device.
9923
if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9924
return true;
9925
return false;
9926
}
9927
9928
bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9929
// If emitting code for the host, we do not process FD here. Instead we do
9930
// the normal code generation.
9931
if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9932
if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9933
if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9934
CGM.getLangOpts().OpenMPIsTargetDevice))
9935
return true;
9936
return false;
9937
}
9938
9939
const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9940
// Try to detect target regions in the function.
9941
if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9942
StringRef Name = CGM.getMangledName(GD);
9943
scanForTargetRegionsFunctions(FD->getBody(), Name);
9944
if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9945
CGM.getLangOpts().OpenMPIsTargetDevice))
9946
return true;
9947
}
9948
9949
// Do not to emit function if it is not marked as declare target.
9950
return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9951
AlreadyEmittedTargetDecls.count(VD) == 0;
9952
}
9953
9954
bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9955
if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9956
CGM.getLangOpts().OpenMPIsTargetDevice))
9957
return true;
9958
9959
if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9960
return false;
9961
9962
// Check if there are Ctors/Dtors in this declaration and look for target
9963
// regions in it. We use the complete variant to produce the kernel name
9964
// mangling.
9965
QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9966
if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9967
for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9968
StringRef ParentName =
9969
CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9970
scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9971
}
9972
if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9973
StringRef ParentName =
9974
CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9975
scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9976
}
9977
}
9978
9979
// Do not to emit variable if it is not marked as declare target.
9980
std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9981
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9982
cast<VarDecl>(GD.getDecl()));
9983
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9984
((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9985
*Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9986
HasRequiresUnifiedSharedMemory)) {
9987
DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9988
return true;
9989
}
9990
return false;
9991
}
9992
9993
void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9994
llvm::Constant *Addr) {
9995
if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9996
!CGM.getLangOpts().OpenMPIsTargetDevice)
9997
return;
9998
9999
std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10000
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10001
10002
// If this is an 'extern' declaration we defer to the canonical definition and
10003
// do not emit an offloading entry.
10004
if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10005
VD->hasExternalStorage())
10006
return;
10007
10008
if (!Res) {
10009
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10010
// Register non-target variables being emitted in device code (debug info
10011
// may cause this).
10012
StringRef VarName = CGM.getMangledName(VD);
10013
EmittedNonTargetVariables.try_emplace(VarName, Addr);
10014
}
10015
return;
10016
}
10017
10018
auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10019
auto LinkageForVariable = [&VD, this]() {
10020
return CGM.getLLVMLinkageVarDefinition(VD);
10021
};
10022
10023
std::vector<llvm::GlobalVariable *> GeneratedRefs;
10024
OMPBuilder.registerTargetGlobalVariable(
10025
convertCaptureClause(VD), convertDeviceClause(VD),
10026
VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10027
VD->isExternallyVisible(),
10028
getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10029
VD->getCanonicalDecl()->getBeginLoc()),
10030
CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10031
CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10032
CGM.getTypes().ConvertTypeForMem(
10033
CGM.getContext().getPointerType(VD->getType())),
10034
Addr);
10035
10036
for (auto *ref : GeneratedRefs)
10037
CGM.addCompilerUsedGlobal(ref);
10038
}
10039
10040
bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10041
if (isa<FunctionDecl>(GD.getDecl()) ||
10042
isa<OMPDeclareReductionDecl>(GD.getDecl()))
10043
return emitTargetFunctions(GD);
10044
10045
return emitTargetGlobalVariable(GD);
10046
}
10047
10048
void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10049
for (const VarDecl *VD : DeferredGlobalVariables) {
10050
std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10051
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10052
if (!Res)
10053
continue;
10054
if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10055
*Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10056
!HasRequiresUnifiedSharedMemory) {
10057
CGM.EmitGlobal(VD);
10058
} else {
10059
assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10060
((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10061
*Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10062
HasRequiresUnifiedSharedMemory)) &&
10063
"Expected link clause or to clause with unified memory.");
10064
(void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10065
}
10066
}
10067
}
10068
10069
void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10070
CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10071
assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10072
" Expected target-based directive.");
10073
}
10074
10075
void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10076
for (const OMPClause *Clause : D->clauselists()) {
10077
if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10078
HasRequiresUnifiedSharedMemory = true;
10079
OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10080
} else if (const auto *AC =
10081
dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10082
switch (AC->getAtomicDefaultMemOrderKind()) {
10083
case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10084
RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10085
break;
10086
case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10087
RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10088
break;
10089
case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10090
RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10091
break;
10092
case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10093
break;
10094
}
10095
}
10096
}
10097
}
10098
10099
llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10100
return RequiresAtomicOrdering;
10101
}
10102
10103
bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10104
LangAS &AS) {
10105
if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10106
return false;
10107
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10108
switch(A->getAllocatorType()) {
10109
case OMPAllocateDeclAttr::OMPNullMemAlloc:
10110
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10111
// Not supported, fallback to the default mem space.
10112
case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10113
case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10114
case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10115
case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10116
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10117
case OMPAllocateDeclAttr::OMPConstMemAlloc:
10118
case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10119
AS = LangAS::Default;
10120
return true;
10121
case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10122
llvm_unreachable("Expected predefined allocator for the variables with the "
10123
"static storage.");
10124
}
10125
return false;
10126
}
10127
10128
bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10129
return HasRequiresUnifiedSharedMemory;
10130
}
10131
10132
CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10133
CodeGenModule &CGM)
10134
: CGM(CGM) {
10135
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10136
SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10137
CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10138
}
10139
}
10140
10141
CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10142
if (CGM.getLangOpts().OpenMPIsTargetDevice)
10143
CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10144
}
10145
10146
bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10147
if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10148
return true;
10149
10150
const auto *D = cast<FunctionDecl>(GD.getDecl());
10151
// Do not to emit function if it is marked as declare target as it was already
10152
// emitted.
10153
if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10154
if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10155
if (auto *F = dyn_cast_or_null<llvm::Function>(
10156
CGM.GetGlobalValue(CGM.getMangledName(GD))))
10157
return !F->isDeclaration();
10158
return false;
10159
}
10160
return true;
10161
}
10162
10163
return !AlreadyEmittedTargetDecls.insert(D).second;
10164
}
10165
10166
void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10167
const OMPExecutableDirective &D,
10168
SourceLocation Loc,
10169
llvm::Function *OutlinedFn,
10170
ArrayRef<llvm::Value *> CapturedVars) {
10171
if (!CGF.HaveInsertPoint())
10172
return;
10173
10174
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10175
CodeGenFunction::RunCleanupsScope Scope(CGF);
10176
10177
// Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10178
llvm::Value *Args[] = {
10179
RTLoc,
10180
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10181
CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10182
llvm::SmallVector<llvm::Value *, 16> RealArgs;
10183
RealArgs.append(std::begin(Args), std::end(Args));
10184
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10185
10186
llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10187
CGM.getModule(), OMPRTL___kmpc_fork_teams);
10188
CGF.EmitRuntimeCall(RTLFn, RealArgs);
10189
}
10190
10191
void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10192
const Expr *NumTeams,
10193
const Expr *ThreadLimit,
10194
SourceLocation Loc) {
10195
if (!CGF.HaveInsertPoint())
10196
return;
10197
10198
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10199
10200
llvm::Value *NumTeamsVal =
10201
NumTeams
10202
? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10203
CGF.CGM.Int32Ty, /* isSigned = */ true)
10204
: CGF.Builder.getInt32(0);
10205
10206
llvm::Value *ThreadLimitVal =
10207
ThreadLimit
10208
? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10209
CGF.CGM.Int32Ty, /* isSigned = */ true)
10210
: CGF.Builder.getInt32(0);
10211
10212
// Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10213
llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10214
ThreadLimitVal};
10215
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10216
CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10217
PushNumTeamsArgs);
10218
}
10219
10220
void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10221
const Expr *ThreadLimit,
10222
SourceLocation Loc) {
10223
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10224
llvm::Value *ThreadLimitVal =
10225
ThreadLimit
10226
? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10227
CGF.CGM.Int32Ty, /* isSigned = */ true)
10228
: CGF.Builder.getInt32(0);
10229
10230
// Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10231
llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10232
ThreadLimitVal};
10233
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10234
CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10235
ThreadLimitArgs);
10236
}
10237
10238
void CGOpenMPRuntime::emitTargetDataCalls(
10239
CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10240
const Expr *Device, const RegionCodeGenTy &CodeGen,
10241
CGOpenMPRuntime::TargetDataInfo &Info) {
10242
if (!CGF.HaveInsertPoint())
10243
return;
10244
10245
// Action used to replace the default codegen action and turn privatization
10246
// off.
10247
PrePostActionTy NoPrivAction;
10248
10249
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10250
10251
llvm::Value *IfCondVal = nullptr;
10252
if (IfCond)
10253
IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10254
10255
// Emit device ID if any.
10256
llvm::Value *DeviceID = nullptr;
10257
if (Device) {
10258
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10259
CGF.Int64Ty, /*isSigned=*/true);
10260
} else {
10261
DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10262
}
10263
10264
// Fill up the arrays with all the mapped variables.
10265
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10266
auto GenMapInfoCB =
10267
[&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10268
CGF.Builder.restoreIP(CodeGenIP);
10269
// Get map clause information.
10270
MappableExprsHandler MEHandler(D, CGF);
10271
MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10272
10273
auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10274
return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10275
};
10276
if (CGM.getCodeGenOpts().getDebugInfo() !=
10277
llvm::codegenoptions::NoDebugInfo) {
10278
CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10279
llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10280
FillInfoMap);
10281
}
10282
10283
return CombinedInfo;
10284
};
10285
using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10286
auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10287
CGF.Builder.restoreIP(CodeGenIP);
10288
switch (BodyGenType) {
10289
case BodyGenTy::Priv:
10290
if (!Info.CaptureDeviceAddrMap.empty())
10291
CodeGen(CGF);
10292
break;
10293
case BodyGenTy::DupNoPriv:
10294
if (!Info.CaptureDeviceAddrMap.empty()) {
10295
CodeGen.setAction(NoPrivAction);
10296
CodeGen(CGF);
10297
}
10298
break;
10299
case BodyGenTy::NoPriv:
10300
if (Info.CaptureDeviceAddrMap.empty()) {
10301
CodeGen.setAction(NoPrivAction);
10302
CodeGen(CGF);
10303
}
10304
break;
10305
}
10306
return InsertPointTy(CGF.Builder.GetInsertBlock(),
10307
CGF.Builder.GetInsertPoint());
10308
};
10309
10310
auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10311
if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10312
Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10313
}
10314
};
10315
10316
auto CustomMapperCB = [&](unsigned int I) {
10317
llvm::Value *MFunc = nullptr;
10318
if (CombinedInfo.Mappers[I]) {
10319
Info.HasMapper = true;
10320
MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10321
cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10322
}
10323
return MFunc;
10324
};
10325
10326
// Source location for the ident struct
10327
llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10328
10329
InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10330
CGF.AllocaInsertPt->getIterator());
10331
InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10332
CGF.Builder.GetInsertPoint());
10333
llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10334
CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10335
OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10336
/*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10337
}
10338
10339
void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10340
CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10341
const Expr *Device) {
10342
if (!CGF.HaveInsertPoint())
10343
return;
10344
10345
assert((isa<OMPTargetEnterDataDirective>(D) ||
10346
isa<OMPTargetExitDataDirective>(D) ||
10347
isa<OMPTargetUpdateDirective>(D)) &&
10348
"Expecting either target enter, exit data, or update directives.");
10349
10350
CodeGenFunction::OMPTargetDataInfo InputInfo;
10351
llvm::Value *MapTypesArray = nullptr;
10352
llvm::Value *MapNamesArray = nullptr;
10353
// Generate the code for the opening of the data environment.
10354
auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10355
&MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10356
// Emit device ID if any.
10357
llvm::Value *DeviceID = nullptr;
10358
if (Device) {
10359
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10360
CGF.Int64Ty, /*isSigned=*/true);
10361
} else {
10362
DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10363
}
10364
10365
// Emit the number of elements in the offloading arrays.
10366
llvm::Constant *PointerNum =
10367
CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10368
10369
// Source location for the ident struct
10370
llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10371
10372
SmallVector<llvm::Value *, 13> OffloadingArgs(
10373
{RTLoc, DeviceID, PointerNum,
10374
InputInfo.BasePointersArray.emitRawPointer(CGF),
10375
InputInfo.PointersArray.emitRawPointer(CGF),
10376
InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10377
InputInfo.MappersArray.emitRawPointer(CGF)});
10378
10379
// Select the right runtime function call for each standalone
10380
// directive.
10381
const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10382
RuntimeFunction RTLFn;
10383
switch (D.getDirectiveKind()) {
10384
case OMPD_target_enter_data:
10385
RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10386
: OMPRTL___tgt_target_data_begin_mapper;
10387
break;
10388
case OMPD_target_exit_data:
10389
RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10390
: OMPRTL___tgt_target_data_end_mapper;
10391
break;
10392
case OMPD_target_update:
10393
RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10394
: OMPRTL___tgt_target_data_update_mapper;
10395
break;
10396
case OMPD_parallel:
10397
case OMPD_for:
10398
case OMPD_parallel_for:
10399
case OMPD_parallel_master:
10400
case OMPD_parallel_sections:
10401
case OMPD_for_simd:
10402
case OMPD_parallel_for_simd:
10403
case OMPD_cancel:
10404
case OMPD_cancellation_point:
10405
case OMPD_ordered:
10406
case OMPD_threadprivate:
10407
case OMPD_allocate:
10408
case OMPD_task:
10409
case OMPD_simd:
10410
case OMPD_tile:
10411
case OMPD_unroll:
10412
case OMPD_sections:
10413
case OMPD_section:
10414
case OMPD_single:
10415
case OMPD_master:
10416
case OMPD_critical:
10417
case OMPD_taskyield:
10418
case OMPD_barrier:
10419
case OMPD_taskwait:
10420
case OMPD_taskgroup:
10421
case OMPD_atomic:
10422
case OMPD_flush:
10423
case OMPD_depobj:
10424
case OMPD_scan:
10425
case OMPD_teams:
10426
case OMPD_target_data:
10427
case OMPD_distribute:
10428
case OMPD_distribute_simd:
10429
case OMPD_distribute_parallel_for:
10430
case OMPD_distribute_parallel_for_simd:
10431
case OMPD_teams_distribute:
10432
case OMPD_teams_distribute_simd:
10433
case OMPD_teams_distribute_parallel_for:
10434
case OMPD_teams_distribute_parallel_for_simd:
10435
case OMPD_declare_simd:
10436
case OMPD_declare_variant:
10437
case OMPD_begin_declare_variant:
10438
case OMPD_end_declare_variant:
10439
case OMPD_declare_target:
10440
case OMPD_end_declare_target:
10441
case OMPD_declare_reduction:
10442
case OMPD_declare_mapper:
10443
case OMPD_taskloop:
10444
case OMPD_taskloop_simd:
10445
case OMPD_master_taskloop:
10446
case OMPD_master_taskloop_simd:
10447
case OMPD_parallel_master_taskloop:
10448
case OMPD_parallel_master_taskloop_simd:
10449
case OMPD_target:
10450
case OMPD_target_simd:
10451
case OMPD_target_teams_distribute:
10452
case OMPD_target_teams_distribute_simd:
10453
case OMPD_target_teams_distribute_parallel_for:
10454
case OMPD_target_teams_distribute_parallel_for_simd:
10455
case OMPD_target_teams:
10456
case OMPD_target_parallel:
10457
case OMPD_target_parallel_for:
10458
case OMPD_target_parallel_for_simd:
10459
case OMPD_requires:
10460
case OMPD_metadirective:
10461
case OMPD_unknown:
10462
default:
10463
llvm_unreachable("Unexpected standalone target data directive.");
10464
break;
10465
}
10466
if (HasNowait) {
10467
OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10468
OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10469
OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10470
OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10471
}
10472
CGF.EmitRuntimeCall(
10473
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10474
OffloadingArgs);
10475
};
10476
10477
auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10478
&MapNamesArray](CodeGenFunction &CGF,
10479
PrePostActionTy &) {
10480
// Fill up the arrays with all the mapped variables.
10481
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10482
10483
// Get map clause information.
10484
MappableExprsHandler MEHandler(D, CGF);
10485
MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10486
10487
CGOpenMPRuntime::TargetDataInfo Info;
10488
// Fill up the arrays and create the arguments.
10489
emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10490
/*IsNonContiguous=*/true);
10491
bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10492
D.hasClausesOfKind<OMPNowaitClause>();
10493
bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10494
llvm::codegenoptions::NoDebugInfo;
10495
OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10496
EmitDebug,
10497
/*ForEndCall=*/false);
10498
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10499
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10500
CGF.VoidPtrTy, CGM.getPointerAlign());
10501
InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10502
CGM.getPointerAlign());
10503
InputInfo.SizesArray =
10504
Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10505
InputInfo.MappersArray =
10506
Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10507
MapTypesArray = Info.RTArgs.MapTypesArray;
10508
MapNamesArray = Info.RTArgs.MapNamesArray;
10509
if (RequiresOuterTask)
10510
CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10511
else
10512
emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10513
};
10514
10515
if (IfCond) {
10516
emitIfClause(CGF, IfCond, TargetThenGen,
10517
[](CodeGenFunction &CGF, PrePostActionTy &) {});
10518
} else {
10519
RegionCodeGenTy ThenRCG(TargetThenGen);
10520
ThenRCG(CGF);
10521
}
10522
}
10523
10524
namespace {
10525
/// Kind of parameter in a function with 'declare simd' directive.
10526
enum ParamKindTy {
10527
Linear,
10528
LinearRef,
10529
LinearUVal,
10530
LinearVal,
10531
Uniform,
10532
Vector,
10533
};
10534
/// Attribute set of the parameter.
10535
struct ParamAttrTy {
10536
ParamKindTy Kind = Vector;
10537
llvm::APSInt StrideOrArg;
10538
llvm::APSInt Alignment;
10539
bool HasVarStride = false;
10540
};
10541
} // namespace
10542
10543
static unsigned evaluateCDTSize(const FunctionDecl *FD,
10544
ArrayRef<ParamAttrTy> ParamAttrs) {
10545
// Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10546
// If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10547
// of that clause. The VLEN value must be power of 2.
10548
// In other case the notion of the function`s "characteristic data type" (CDT)
10549
// is used to compute the vector length.
10550
// CDT is defined in the following order:
10551
// a) For non-void function, the CDT is the return type.
10552
// b) If the function has any non-uniform, non-linear parameters, then the
10553
// CDT is the type of the first such parameter.
10554
// c) If the CDT determined by a) or b) above is struct, union, or class
10555
// type which is pass-by-value (except for the type that maps to the
10556
// built-in complex data type), the characteristic data type is int.
10557
// d) If none of the above three cases is applicable, the CDT is int.
10558
// The VLEN is then determined based on the CDT and the size of vector
10559
// register of that ISA for which current vector version is generated. The
10560
// VLEN is computed using the formula below:
10561
// VLEN = sizeof(vector_register) / sizeof(CDT),
10562
// where vector register size specified in section 3.2.1 Registers and the
10563
// Stack Frame of original AMD64 ABI document.
10564
QualType RetType = FD->getReturnType();
10565
if (RetType.isNull())
10566
return 0;
10567
ASTContext &C = FD->getASTContext();
10568
QualType CDT;
10569
if (!RetType.isNull() && !RetType->isVoidType()) {
10570
CDT = RetType;
10571
} else {
10572
unsigned Offset = 0;
10573
if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10574
if (ParamAttrs[Offset].Kind == Vector)
10575
CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10576
++Offset;
10577
}
10578
if (CDT.isNull()) {
10579
for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10580
if (ParamAttrs[I + Offset].Kind == Vector) {
10581
CDT = FD->getParamDecl(I)->getType();
10582
break;
10583
}
10584
}
10585
}
10586
}
10587
if (CDT.isNull())
10588
CDT = C.IntTy;
10589
CDT = CDT->getCanonicalTypeUnqualified();
10590
if (CDT->isRecordType() || CDT->isUnionType())
10591
CDT = C.IntTy;
10592
return C.getTypeSize(CDT);
10593
}
10594
10595
/// Mangle the parameter part of the vector function name according to
10596
/// their OpenMP classification. The mangling function is defined in
10597
/// section 4.5 of the AAVFABI(2021Q1).
10598
static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10599
SmallString<256> Buffer;
10600
llvm::raw_svector_ostream Out(Buffer);
10601
for (const auto &ParamAttr : ParamAttrs) {
10602
switch (ParamAttr.Kind) {
10603
case Linear:
10604
Out << 'l';
10605
break;
10606
case LinearRef:
10607
Out << 'R';
10608
break;
10609
case LinearUVal:
10610
Out << 'U';
10611
break;
10612
case LinearVal:
10613
Out << 'L';
10614
break;
10615
case Uniform:
10616
Out << 'u';
10617
break;
10618
case Vector:
10619
Out << 'v';
10620
break;
10621
}
10622
if (ParamAttr.HasVarStride)
10623
Out << "s" << ParamAttr.StrideOrArg;
10624
else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10625
ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10626
// Don't print the step value if it is not present or if it is
10627
// equal to 1.
10628
if (ParamAttr.StrideOrArg < 0)
10629
Out << 'n' << -ParamAttr.StrideOrArg;
10630
else if (ParamAttr.StrideOrArg != 1)
10631
Out << ParamAttr.StrideOrArg;
10632
}
10633
10634
if (!!ParamAttr.Alignment)
10635
Out << 'a' << ParamAttr.Alignment;
10636
}
10637
10638
return std::string(Out.str());
10639
}
10640
10641
static void
10642
emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10643
const llvm::APSInt &VLENVal,
10644
ArrayRef<ParamAttrTy> ParamAttrs,
10645
OMPDeclareSimdDeclAttr::BranchStateTy State) {
10646
struct ISADataTy {
10647
char ISA;
10648
unsigned VecRegSize;
10649
};
10650
ISADataTy ISAData[] = {
10651
{
10652
'b', 128
10653
}, // SSE
10654
{
10655
'c', 256
10656
}, // AVX
10657
{
10658
'd', 256
10659
}, // AVX2
10660
{
10661
'e', 512
10662
}, // AVX512
10663
};
10664
llvm::SmallVector<char, 2> Masked;
10665
switch (State) {
10666
case OMPDeclareSimdDeclAttr::BS_Undefined:
10667
Masked.push_back('N');
10668
Masked.push_back('M');
10669
break;
10670
case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10671
Masked.push_back('N');
10672
break;
10673
case OMPDeclareSimdDeclAttr::BS_Inbranch:
10674
Masked.push_back('M');
10675
break;
10676
}
10677
for (char Mask : Masked) {
10678
for (const ISADataTy &Data : ISAData) {
10679
SmallString<256> Buffer;
10680
llvm::raw_svector_ostream Out(Buffer);
10681
Out << "_ZGV" << Data.ISA << Mask;
10682
if (!VLENVal) {
10683
unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10684
assert(NumElts && "Non-zero simdlen/cdtsize expected");
10685
Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10686
} else {
10687
Out << VLENVal;
10688
}
10689
Out << mangleVectorParameters(ParamAttrs);
10690
Out << '_' << Fn->getName();
10691
Fn->addFnAttr(Out.str());
10692
}
10693
}
10694
}
10695
10696
// This are the Functions that are needed to mangle the name of the
10697
// vector functions generated by the compiler, according to the rules
10698
// defined in the "Vector Function ABI specifications for AArch64",
10699
// available at
10700
// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10701
10702
/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10703
static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10704
QT = QT.getCanonicalType();
10705
10706
if (QT->isVoidType())
10707
return false;
10708
10709
if (Kind == ParamKindTy::Uniform)
10710
return false;
10711
10712
if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10713
return false;
10714
10715
if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10716
!QT->isReferenceType())
10717
return false;
10718
10719
return true;
10720
}
10721
10722
/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10723
static bool getAArch64PBV(QualType QT, ASTContext &C) {
10724
QT = QT.getCanonicalType();
10725
unsigned Size = C.getTypeSize(QT);
10726
10727
// Only scalars and complex within 16 bytes wide set PVB to true.
10728
if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10729
return false;
10730
10731
if (QT->isFloatingType())
10732
return true;
10733
10734
if (QT->isIntegerType())
10735
return true;
10736
10737
if (QT->isPointerType())
10738
return true;
10739
10740
// TODO: Add support for complex types (section 3.1.2, item 2).
10741
10742
return false;
10743
}
10744
10745
/// Computes the lane size (LS) of a return type or of an input parameter,
10746
/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10747
/// TODO: Add support for references, section 3.2.1, item 1.
10748
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10749
if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10750
QualType PTy = QT.getCanonicalType()->getPointeeType();
10751
if (getAArch64PBV(PTy, C))
10752
return C.getTypeSize(PTy);
10753
}
10754
if (getAArch64PBV(QT, C))
10755
return C.getTypeSize(QT);
10756
10757
return C.getTypeSize(C.getUIntPtrType());
10758
}
10759
10760
// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10761
// signature of the scalar function, as defined in 3.2.2 of the
10762
// AAVFABI.
10763
static std::tuple<unsigned, unsigned, bool>
10764
getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10765
QualType RetType = FD->getReturnType().getCanonicalType();
10766
10767
ASTContext &C = FD->getASTContext();
10768
10769
bool OutputBecomesInput = false;
10770
10771
llvm::SmallVector<unsigned, 8> Sizes;
10772
if (!RetType->isVoidType()) {
10773
Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10774
if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10775
OutputBecomesInput = true;
10776
}
10777
for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10778
QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10779
Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10780
}
10781
10782
assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10783
// The LS of a function parameter / return value can only be a power
10784
// of 2, starting from 8 bits, up to 128.
10785
assert(llvm::all_of(Sizes,
10786
[](unsigned Size) {
10787
return Size == 8 || Size == 16 || Size == 32 ||
10788
Size == 64 || Size == 128;
10789
}) &&
10790
"Invalid size");
10791
10792
return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10793
*std::max_element(std::begin(Sizes), std::end(Sizes)),
10794
OutputBecomesInput);
10795
}
10796
10797
// Function used to add the attribute. The parameter `VLEN` is
10798
// templated to allow the use of "x" when targeting scalable functions
10799
// for SVE.
10800
template <typename T>
10801
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10802
char ISA, StringRef ParSeq,
10803
StringRef MangledName, bool OutputBecomesInput,
10804
llvm::Function *Fn) {
10805
SmallString<256> Buffer;
10806
llvm::raw_svector_ostream Out(Buffer);
10807
Out << Prefix << ISA << LMask << VLEN;
10808
if (OutputBecomesInput)
10809
Out << "v";
10810
Out << ParSeq << "_" << MangledName;
10811
Fn->addFnAttr(Out.str());
10812
}
10813
10814
// Helper function to generate the Advanced SIMD names depending on
10815
// the value of the NDS when simdlen is not present.
10816
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10817
StringRef Prefix, char ISA,
10818
StringRef ParSeq, StringRef MangledName,
10819
bool OutputBecomesInput,
10820
llvm::Function *Fn) {
10821
switch (NDS) {
10822
case 8:
10823
addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10824
OutputBecomesInput, Fn);
10825
addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10826
OutputBecomesInput, Fn);
10827
break;
10828
case 16:
10829
addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10830
OutputBecomesInput, Fn);
10831
addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10832
OutputBecomesInput, Fn);
10833
break;
10834
case 32:
10835
addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10836
OutputBecomesInput, Fn);
10837
addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10838
OutputBecomesInput, Fn);
10839
break;
10840
case 64:
10841
case 128:
10842
addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10843
OutputBecomesInput, Fn);
10844
break;
10845
default:
10846
llvm_unreachable("Scalar type is too wide.");
10847
}
10848
}
10849
10850
/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10851
static void emitAArch64DeclareSimdFunction(
10852
CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10853
ArrayRef<ParamAttrTy> ParamAttrs,
10854
OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10855
char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10856
10857
// Get basic data for building the vector signature.
10858
const auto Data = getNDSWDS(FD, ParamAttrs);
10859
const unsigned NDS = std::get<0>(Data);
10860
const unsigned WDS = std::get<1>(Data);
10861
const bool OutputBecomesInput = std::get<2>(Data);
10862
10863
// Check the values provided via `simdlen` by the user.
10864
// 1. A `simdlen(1)` doesn't produce vector signatures,
10865
if (UserVLEN == 1) {
10866
unsigned DiagID = CGM.getDiags().getCustomDiagID(
10867
DiagnosticsEngine::Warning,
10868
"The clause simdlen(1) has no effect when targeting aarch64.");
10869
CGM.getDiags().Report(SLoc, DiagID);
10870
return;
10871
}
10872
10873
// 2. Section 3.3.1, item 1: user input must be a power of 2 for
10874
// Advanced SIMD output.
10875
if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10876
unsigned DiagID = CGM.getDiags().getCustomDiagID(
10877
DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10878
"power of 2 when targeting Advanced SIMD.");
10879
CGM.getDiags().Report(SLoc, DiagID);
10880
return;
10881
}
10882
10883
// 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10884
// limits.
10885
if (ISA == 's' && UserVLEN != 0) {
10886
if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10887
unsigned DiagID = CGM.getDiags().getCustomDiagID(
10888
DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10889
"lanes in the architectural constraints "
10890
"for SVE (min is 128-bit, max is "
10891
"2048-bit, by steps of 128-bit)");
10892
CGM.getDiags().Report(SLoc, DiagID) << WDS;
10893
return;
10894
}
10895
}
10896
10897
// Sort out parameter sequence.
10898
const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10899
StringRef Prefix = "_ZGV";
10900
// Generate simdlen from user input (if any).
10901
if (UserVLEN) {
10902
if (ISA == 's') {
10903
// SVE generates only a masked function.
10904
addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10905
OutputBecomesInput, Fn);
10906
} else {
10907
assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10908
// Advanced SIMD generates one or two functions, depending on
10909
// the `[not]inbranch` clause.
10910
switch (State) {
10911
case OMPDeclareSimdDeclAttr::BS_Undefined:
10912
addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10913
OutputBecomesInput, Fn);
10914
addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10915
OutputBecomesInput, Fn);
10916
break;
10917
case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10918
addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10919
OutputBecomesInput, Fn);
10920
break;
10921
case OMPDeclareSimdDeclAttr::BS_Inbranch:
10922
addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10923
OutputBecomesInput, Fn);
10924
break;
10925
}
10926
}
10927
} else {
10928
// If no user simdlen is provided, follow the AAVFABI rules for
10929
// generating the vector length.
10930
if (ISA == 's') {
10931
// SVE, section 3.4.1, item 1.
10932
addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10933
OutputBecomesInput, Fn);
10934
} else {
10935
assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10936
// Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10937
// two vector names depending on the use of the clause
10938
// `[not]inbranch`.
10939
switch (State) {
10940
case OMPDeclareSimdDeclAttr::BS_Undefined:
10941
addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10942
OutputBecomesInput, Fn);
10943
addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10944
OutputBecomesInput, Fn);
10945
break;
10946
case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10947
addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10948
OutputBecomesInput, Fn);
10949
break;
10950
case OMPDeclareSimdDeclAttr::BS_Inbranch:
10951
addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10952
OutputBecomesInput, Fn);
10953
break;
10954
}
10955
}
10956
}
10957
}
10958
10959
void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10960
llvm::Function *Fn) {
10961
ASTContext &C = CGM.getContext();
10962
FD = FD->getMostRecentDecl();
10963
while (FD) {
10964
// Map params to their positions in function decl.
10965
llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10966
if (isa<CXXMethodDecl>(FD))
10967
ParamPositions.try_emplace(FD, 0);
10968
unsigned ParamPos = ParamPositions.size();
10969
for (const ParmVarDecl *P : FD->parameters()) {
10970
ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10971
++ParamPos;
10972
}
10973
for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10974
llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10975
// Mark uniform parameters.
10976
for (const Expr *E : Attr->uniforms()) {
10977
E = E->IgnoreParenImpCasts();
10978
unsigned Pos;
10979
if (isa<CXXThisExpr>(E)) {
10980
Pos = ParamPositions[FD];
10981
} else {
10982
const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10983
->getCanonicalDecl();
10984
auto It = ParamPositions.find(PVD);
10985
assert(It != ParamPositions.end() && "Function parameter not found");
10986
Pos = It->second;
10987
}
10988
ParamAttrs[Pos].Kind = Uniform;
10989
}
10990
// Get alignment info.
10991
auto *NI = Attr->alignments_begin();
10992
for (const Expr *E : Attr->aligneds()) {
10993
E = E->IgnoreParenImpCasts();
10994
unsigned Pos;
10995
QualType ParmTy;
10996
if (isa<CXXThisExpr>(E)) {
10997
Pos = ParamPositions[FD];
10998
ParmTy = E->getType();
10999
} else {
11000
const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11001
->getCanonicalDecl();
11002
auto It = ParamPositions.find(PVD);
11003
assert(It != ParamPositions.end() && "Function parameter not found");
11004
Pos = It->second;
11005
ParmTy = PVD->getType();
11006
}
11007
ParamAttrs[Pos].Alignment =
11008
(*NI)
11009
? (*NI)->EvaluateKnownConstInt(C)
11010
: llvm::APSInt::getUnsigned(
11011
C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11012
.getQuantity());
11013
++NI;
11014
}
11015
// Mark linear parameters.
11016
auto *SI = Attr->steps_begin();
11017
auto *MI = Attr->modifiers_begin();
11018
for (const Expr *E : Attr->linears()) {
11019
E = E->IgnoreParenImpCasts();
11020
unsigned Pos;
11021
bool IsReferenceType = false;
11022
// Rescaling factor needed to compute the linear parameter
11023
// value in the mangled name.
11024
unsigned PtrRescalingFactor = 1;
11025
if (isa<CXXThisExpr>(E)) {
11026
Pos = ParamPositions[FD];
11027
auto *P = cast<PointerType>(E->getType());
11028
PtrRescalingFactor = CGM.getContext()
11029
.getTypeSizeInChars(P->getPointeeType())
11030
.getQuantity();
11031
} else {
11032
const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11033
->getCanonicalDecl();
11034
auto It = ParamPositions.find(PVD);
11035
assert(It != ParamPositions.end() && "Function parameter not found");
11036
Pos = It->second;
11037
if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11038
PtrRescalingFactor = CGM.getContext()
11039
.getTypeSizeInChars(P->getPointeeType())
11040
.getQuantity();
11041
else if (PVD->getType()->isReferenceType()) {
11042
IsReferenceType = true;
11043
PtrRescalingFactor =
11044
CGM.getContext()
11045
.getTypeSizeInChars(PVD->getType().getNonReferenceType())
11046
.getQuantity();
11047
}
11048
}
11049
ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11050
if (*MI == OMPC_LINEAR_ref)
11051
ParamAttr.Kind = LinearRef;
11052
else if (*MI == OMPC_LINEAR_uval)
11053
ParamAttr.Kind = LinearUVal;
11054
else if (IsReferenceType)
11055
ParamAttr.Kind = LinearVal;
11056
else
11057
ParamAttr.Kind = Linear;
11058
// Assuming a stride of 1, for `linear` without modifiers.
11059
ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11060
if (*SI) {
11061
Expr::EvalResult Result;
11062
if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11063
if (const auto *DRE =
11064
cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11065
if (const auto *StridePVD =
11066
dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11067
ParamAttr.HasVarStride = true;
11068
auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11069
assert(It != ParamPositions.end() &&
11070
"Function parameter not found");
11071
ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11072
}
11073
}
11074
} else {
11075
ParamAttr.StrideOrArg = Result.Val.getInt();
11076
}
11077
}
11078
// If we are using a linear clause on a pointer, we need to
11079
// rescale the value of linear_step with the byte size of the
11080
// pointee type.
11081
if (!ParamAttr.HasVarStride &&
11082
(ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11083
ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11084
++SI;
11085
++MI;
11086
}
11087
llvm::APSInt VLENVal;
11088
SourceLocation ExprLoc;
11089
const Expr *VLENExpr = Attr->getSimdlen();
11090
if (VLENExpr) {
11091
VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11092
ExprLoc = VLENExpr->getExprLoc();
11093
}
11094
OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11095
if (CGM.getTriple().isX86()) {
11096
emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11097
} else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11098
unsigned VLEN = VLENVal.getExtValue();
11099
StringRef MangledName = Fn->getName();
11100
if (CGM.getTarget().hasFeature("sve"))
11101
emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11102
MangledName, 's', 128, Fn, ExprLoc);
11103
else if (CGM.getTarget().hasFeature("neon"))
11104
emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11105
MangledName, 'n', 128, Fn, ExprLoc);
11106
}
11107
}
11108
FD = FD->getPreviousDecl();
11109
}
11110
}
11111
11112
namespace {
11113
/// Cleanup action for doacross support.
11114
class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11115
public:
11116
static const int DoacrossFinArgs = 2;
11117
11118
private:
11119
llvm::FunctionCallee RTLFn;
11120
llvm::Value *Args[DoacrossFinArgs];
11121
11122
public:
11123
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11124
ArrayRef<llvm::Value *> CallArgs)
11125
: RTLFn(RTLFn) {
11126
assert(CallArgs.size() == DoacrossFinArgs);
11127
std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11128
}
11129
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11130
if (!CGF.HaveInsertPoint())
11131
return;
11132
CGF.EmitRuntimeCall(RTLFn, Args);
11133
}
11134
};
11135
} // namespace
11136
11137
void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11138
const OMPLoopDirective &D,
11139
ArrayRef<Expr *> NumIterations) {
11140
if (!CGF.HaveInsertPoint())
11141
return;
11142
11143
ASTContext &C = CGM.getContext();
11144
QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11145
RecordDecl *RD;
11146
if (KmpDimTy.isNull()) {
11147
// Build struct kmp_dim { // loop bounds info casted to kmp_int64
11148
// kmp_int64 lo; // lower
11149
// kmp_int64 up; // upper
11150
// kmp_int64 st; // stride
11151
// };
11152
RD = C.buildImplicitRecord("kmp_dim");
11153
RD->startDefinition();
11154
addFieldToRecordDecl(C, RD, Int64Ty);
11155
addFieldToRecordDecl(C, RD, Int64Ty);
11156
addFieldToRecordDecl(C, RD, Int64Ty);
11157
RD->completeDefinition();
11158
KmpDimTy = C.getRecordType(RD);
11159
} else {
11160
RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11161
}
11162
llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11163
QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11164
ArraySizeModifier::Normal, 0);
11165
11166
Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11167
CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11168
enum { LowerFD = 0, UpperFD, StrideFD };
11169
// Fill dims with data.
11170
for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11171
LValue DimsLVal = CGF.MakeAddrLValue(
11172
CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11173
// dims.upper = num_iterations;
11174
LValue UpperLVal = CGF.EmitLValueForField(
11175
DimsLVal, *std::next(RD->field_begin(), UpperFD));
11176
llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11177
CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11178
Int64Ty, NumIterations[I]->getExprLoc());
11179
CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11180
// dims.stride = 1;
11181
LValue StrideLVal = CGF.EmitLValueForField(
11182
DimsLVal, *std::next(RD->field_begin(), StrideFD));
11183
CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11184
StrideLVal);
11185
}
11186
11187
// Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11188
// kmp_int32 num_dims, struct kmp_dim * dims);
11189
llvm::Value *Args[] = {
11190
emitUpdateLocation(CGF, D.getBeginLoc()),
11191
getThreadID(CGF, D.getBeginLoc()),
11192
llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11193
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11194
CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11195
CGM.VoidPtrTy)};
11196
11197
llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11198
CGM.getModule(), OMPRTL___kmpc_doacross_init);
11199
CGF.EmitRuntimeCall(RTLFn, Args);
11200
llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11201
emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11202
llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11203
CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11204
CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11205
llvm::ArrayRef(FiniArgs));
11206
}
11207
11208
template <typename T>
11209
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11210
const T *C, llvm::Value *ULoc,
11211
llvm::Value *ThreadID) {
11212
QualType Int64Ty =
11213
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11214
llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11215
QualType ArrayTy = CGM.getContext().getConstantArrayType(
11216
Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11217
Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11218
for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11219
const Expr *CounterVal = C->getLoopData(I);
11220
assert(CounterVal);
11221
llvm::Value *CntVal = CGF.EmitScalarConversion(
11222
CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11223
CounterVal->getExprLoc());
11224
CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11225
/*Volatile=*/false, Int64Ty);
11226
}
11227
llvm::Value *Args[] = {
11228
ULoc, ThreadID,
11229
CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11230
llvm::FunctionCallee RTLFn;
11231
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11232
OMPDoacrossKind<T> ODK;
11233
if (ODK.isSource(C)) {
11234
RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11235
OMPRTL___kmpc_doacross_post);
11236
} else {
11237
assert(ODK.isSink(C) && "Expect sink modifier.");
11238
RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11239
OMPRTL___kmpc_doacross_wait);
11240
}
11241
CGF.EmitRuntimeCall(RTLFn, Args);
11242
}
11243
11244
void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11245
const OMPDependClause *C) {
11246
return EmitDoacrossOrdered<OMPDependClause>(
11247
CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11248
getThreadID(CGF, C->getBeginLoc()));
11249
}
11250
11251
void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11252
const OMPDoacrossClause *C) {
11253
return EmitDoacrossOrdered<OMPDoacrossClause>(
11254
CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11255
getThreadID(CGF, C->getBeginLoc()));
11256
}
11257
11258
void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11259
llvm::FunctionCallee Callee,
11260
ArrayRef<llvm::Value *> Args) const {
11261
assert(Loc.isValid() && "Outlined function call location must be valid.");
11262
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11263
11264
if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11265
if (Fn->doesNotThrow()) {
11266
CGF.EmitNounwindRuntimeCall(Fn, Args);
11267
return;
11268
}
11269
}
11270
CGF.EmitRuntimeCall(Callee, Args);
11271
}
11272
11273
void CGOpenMPRuntime::emitOutlinedFunctionCall(
11274
CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11275
ArrayRef<llvm::Value *> Args) const {
11276
emitCall(CGF, Loc, OutlinedFn, Args);
11277
}
11278
11279
void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11280
if (const auto *FD = dyn_cast<FunctionDecl>(D))
11281
if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11282
HasEmittedDeclareTargetRegion = true;
11283
}
11284
11285
Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11286
const VarDecl *NativeParam,
11287
const VarDecl *TargetParam) const {
11288
return CGF.GetAddrOfLocalVar(NativeParam);
11289
}
11290
11291
/// Return allocator value from expression, or return a null allocator (default
11292
/// when no allocator specified).
11293
static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11294
const Expr *Allocator) {
11295
llvm::Value *AllocVal;
11296
if (Allocator) {
11297
AllocVal = CGF.EmitScalarExpr(Allocator);
11298
// According to the standard, the original allocator type is a enum
11299
// (integer). Convert to pointer type, if required.
11300
AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11301
CGF.getContext().VoidPtrTy,
11302
Allocator->getExprLoc());
11303
} else {
11304
// If no allocator specified, it defaults to the null allocator.
11305
AllocVal = llvm::Constant::getNullValue(
11306
CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11307
}
11308
return AllocVal;
11309
}
11310
11311
/// Return the alignment from an allocate directive if present.
11312
static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11313
std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11314
11315
if (!AllocateAlignment)
11316
return nullptr;
11317
11318
return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11319
}
11320
11321
Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11322
const VarDecl *VD) {
11323
if (!VD)
11324
return Address::invalid();
11325
Address UntiedAddr = Address::invalid();
11326
Address UntiedRealAddr = Address::invalid();
11327
auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11328
if (It != FunctionToUntiedTaskStackMap.end()) {
11329
const UntiedLocalVarsAddressesMap &UntiedData =
11330
UntiedLocalVarsStack[It->second];
11331
auto I = UntiedData.find(VD);
11332
if (I != UntiedData.end()) {
11333
UntiedAddr = I->second.first;
11334
UntiedRealAddr = I->second.second;
11335
}
11336
}
11337
const VarDecl *CVD = VD->getCanonicalDecl();
11338
if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11339
// Use the default allocation.
11340
if (!isAllocatableDecl(VD))
11341
return UntiedAddr;
11342
llvm::Value *Size;
11343
CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11344
if (CVD->getType()->isVariablyModifiedType()) {
11345
Size = CGF.getTypeSize(CVD->getType());
11346
// Align the size: ((size + align - 1) / align) * align
11347
Size = CGF.Builder.CreateNUWAdd(
11348
Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11349
Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11350
Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11351
} else {
11352
CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11353
Size = CGM.getSize(Sz.alignTo(Align));
11354
}
11355
llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11356
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11357
const Expr *Allocator = AA->getAllocator();
11358
llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11359
llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11360
SmallVector<llvm::Value *, 4> Args;
11361
Args.push_back(ThreadID);
11362
if (Alignment)
11363
Args.push_back(Alignment);
11364
Args.push_back(Size);
11365
Args.push_back(AllocVal);
11366
llvm::omp::RuntimeFunction FnID =
11367
Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11368
llvm::Value *Addr = CGF.EmitRuntimeCall(
11369
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11370
getName({CVD->getName(), ".void.addr"}));
11371
llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11372
CGM.getModule(), OMPRTL___kmpc_free);
11373
QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11374
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11375
Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11376
if (UntiedAddr.isValid())
11377
CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11378
11379
// Cleanup action for allocate support.
11380
class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11381
llvm::FunctionCallee RTLFn;
11382
SourceLocation::UIntTy LocEncoding;
11383
Address Addr;
11384
const Expr *AllocExpr;
11385
11386
public:
11387
OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11388
SourceLocation::UIntTy LocEncoding, Address Addr,
11389
const Expr *AllocExpr)
11390
: RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11391
AllocExpr(AllocExpr) {}
11392
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11393
if (!CGF.HaveInsertPoint())
11394
return;
11395
llvm::Value *Args[3];
11396
Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11397
CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11398
Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11399
Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11400
llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11401
Args[2] = AllocVal;
11402
CGF.EmitRuntimeCall(RTLFn, Args);
11403
}
11404
};
11405
Address VDAddr =
11406
UntiedRealAddr.isValid()
11407
? UntiedRealAddr
11408
: Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11409
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11410
NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11411
VDAddr, Allocator);
11412
if (UntiedRealAddr.isValid())
11413
if (auto *Region =
11414
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11415
Region->emitUntiedSwitch(CGF);
11416
return VDAddr;
11417
}
11418
return UntiedAddr;
11419
}
11420
11421
bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11422
const VarDecl *VD) const {
11423
auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11424
if (It == FunctionToUntiedTaskStackMap.end())
11425
return false;
11426
return UntiedLocalVarsStack[It->second].count(VD) > 0;
11427
}
11428
11429
CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11430
CodeGenModule &CGM, const OMPLoopDirective &S)
11431
: CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11432
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11433
if (!NeedToPush)
11434
return;
11435
NontemporalDeclsSet &DS =
11436
CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11437
for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11438
for (const Stmt *Ref : C->private_refs()) {
11439
const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11440
const ValueDecl *VD;
11441
if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11442
VD = DRE->getDecl();
11443
} else {
11444
const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11445
assert((ME->isImplicitCXXThis() ||
11446
isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11447
"Expected member of current class.");
11448
VD = ME->getMemberDecl();
11449
}
11450
DS.insert(VD);
11451
}
11452
}
11453
}
11454
11455
CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11456
if (!NeedToPush)
11457
return;
11458
CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11459
}
11460
11461
CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11462
CodeGenFunction &CGF,
11463
const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11464
std::pair<Address, Address>> &LocalVars)
11465
: CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11466
if (!NeedToPush)
11467
return;
11468
CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11469
CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11470
CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11471
}
11472
11473
CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11474
if (!NeedToPush)
11475
return;
11476
CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11477
}
11478
11479
bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11480
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11481
11482
return llvm::any_of(
11483
CGM.getOpenMPRuntime().NontemporalDeclsStack,
11484
[VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11485
}
11486
11487
void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11488
const OMPExecutableDirective &S,
11489
llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11490
const {
11491
llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11492
// Vars in target/task regions must be excluded completely.
11493
if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11494
isOpenMPTaskingDirective(S.getDirectiveKind())) {
11495
SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11496
getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11497
const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11498
for (const CapturedStmt::Capture &Cap : CS->captures()) {
11499
if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11500
NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11501
}
11502
}
11503
// Exclude vars in private clauses.
11504
for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11505
for (const Expr *Ref : C->varlists()) {
11506
if (!Ref->getType()->isScalarType())
11507
continue;
11508
const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11509
if (!DRE)
11510
continue;
11511
NeedToCheckForLPCs.insert(DRE->getDecl());
11512
}
11513
}
11514
for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11515
for (const Expr *Ref : C->varlists()) {
11516
if (!Ref->getType()->isScalarType())
11517
continue;
11518
const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11519
if (!DRE)
11520
continue;
11521
NeedToCheckForLPCs.insert(DRE->getDecl());
11522
}
11523
}
11524
for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11525
for (const Expr *Ref : C->varlists()) {
11526
if (!Ref->getType()->isScalarType())
11527
continue;
11528
const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11529
if (!DRE)
11530
continue;
11531
NeedToCheckForLPCs.insert(DRE->getDecl());
11532
}
11533
}
11534
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11535
for (const Expr *Ref : C->varlists()) {
11536
if (!Ref->getType()->isScalarType())
11537
continue;
11538
const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11539
if (!DRE)
11540
continue;
11541
NeedToCheckForLPCs.insert(DRE->getDecl());
11542
}
11543
}
11544
for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11545
for (const Expr *Ref : C->varlists()) {
11546
if (!Ref->getType()->isScalarType())
11547
continue;
11548
const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11549
if (!DRE)
11550
continue;
11551
NeedToCheckForLPCs.insert(DRE->getDecl());
11552
}
11553
}
11554
for (const Decl *VD : NeedToCheckForLPCs) {
11555
for (const LastprivateConditionalData &Data :
11556
llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11557
if (Data.DeclToUniqueName.count(VD) > 0) {
11558
if (!Data.Disabled)
11559
NeedToAddForLPCsAsDisabled.insert(VD);
11560
break;
11561
}
11562
}
11563
}
11564
}
11565
11566
CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11567
CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11568
: CGM(CGF.CGM),
11569
Action((CGM.getLangOpts().OpenMP >= 50 &&
11570
llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11571
[](const OMPLastprivateClause *C) {
11572
return C->getKind() ==
11573
OMPC_LASTPRIVATE_conditional;
11574
}))
11575
? ActionToDo::PushAsLastprivateConditional
11576
: ActionToDo::DoNotPush) {
11577
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11578
if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11579
return;
11580
assert(Action == ActionToDo::PushAsLastprivateConditional &&
11581
"Expected a push action.");
11582
LastprivateConditionalData &Data =
11583
CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11584
for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11585
if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11586
continue;
11587
11588
for (const Expr *Ref : C->varlists()) {
11589
Data.DeclToUniqueName.insert(std::make_pair(
11590
cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11591
SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11592
}
11593
}
11594
Data.IVLVal = IVLVal;
11595
Data.Fn = CGF.CurFn;
11596
}
11597
11598
CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11599
CodeGenFunction &CGF, const OMPExecutableDirective &S)
11600
: CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11601
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11602
if (CGM.getLangOpts().OpenMP < 50)
11603
return;
11604
llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11605
tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11606
if (!NeedToAddForLPCsAsDisabled.empty()) {
11607
Action = ActionToDo::DisableLastprivateConditional;
11608
LastprivateConditionalData &Data =
11609
CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11610
for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11611
Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11612
Data.Fn = CGF.CurFn;
11613
Data.Disabled = true;
11614
}
11615
}
11616
11617
CGOpenMPRuntime::LastprivateConditionalRAII
11618
CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11619
CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11620
return LastprivateConditionalRAII(CGF, S);
11621
}
11622
11623
CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11624
if (CGM.getLangOpts().OpenMP < 50)
11625
return;
11626
if (Action == ActionToDo::DisableLastprivateConditional) {
11627
assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11628
"Expected list of disabled private vars.");
11629
CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11630
}
11631
if (Action == ActionToDo::PushAsLastprivateConditional) {
11632
assert(
11633
!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11634
"Expected list of lastprivate conditional vars.");
11635
CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11636
}
11637
}
11638
11639
Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11640
const VarDecl *VD) {
11641
ASTContext &C = CGM.getContext();
11642
auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11643
if (I == LastprivateConditionalToTypes.end())
11644
I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11645
QualType NewType;
11646
const FieldDecl *VDField;
11647
const FieldDecl *FiredField;
11648
LValue BaseLVal;
11649
auto VI = I->getSecond().find(VD);
11650
if (VI == I->getSecond().end()) {
11651
RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11652
RD->startDefinition();
11653
VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11654
FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11655
RD->completeDefinition();
11656
NewType = C.getRecordType(RD);
11657
Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11658
BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11659
I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11660
} else {
11661
NewType = std::get<0>(VI->getSecond());
11662
VDField = std::get<1>(VI->getSecond());
11663
FiredField = std::get<2>(VI->getSecond());
11664
BaseLVal = std::get<3>(VI->getSecond());
11665
}
11666
LValue FiredLVal =
11667
CGF.EmitLValueForField(BaseLVal, FiredField);
11668
CGF.EmitStoreOfScalar(
11669
llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11670
FiredLVal);
11671
return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11672
}
11673
11674
namespace {
11675
/// Checks if the lastprivate conditional variable is referenced in LHS.
11676
class LastprivateConditionalRefChecker final
11677
: public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11678
ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11679
const Expr *FoundE = nullptr;
11680
const Decl *FoundD = nullptr;
11681
StringRef UniqueDeclName;
11682
LValue IVLVal;
11683
llvm::Function *FoundFn = nullptr;
11684
SourceLocation Loc;
11685
11686
public:
11687
bool VisitDeclRefExpr(const DeclRefExpr *E) {
11688
for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11689
llvm::reverse(LPM)) {
11690
auto It = D.DeclToUniqueName.find(E->getDecl());
11691
if (It == D.DeclToUniqueName.end())
11692
continue;
11693
if (D.Disabled)
11694
return false;
11695
FoundE = E;
11696
FoundD = E->getDecl()->getCanonicalDecl();
11697
UniqueDeclName = It->second;
11698
IVLVal = D.IVLVal;
11699
FoundFn = D.Fn;
11700
break;
11701
}
11702
return FoundE == E;
11703
}
11704
bool VisitMemberExpr(const MemberExpr *E) {
11705
if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11706
return false;
11707
for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11708
llvm::reverse(LPM)) {
11709
auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11710
if (It == D.DeclToUniqueName.end())
11711
continue;
11712
if (D.Disabled)
11713
return false;
11714
FoundE = E;
11715
FoundD = E->getMemberDecl()->getCanonicalDecl();
11716
UniqueDeclName = It->second;
11717
IVLVal = D.IVLVal;
11718
FoundFn = D.Fn;
11719
break;
11720
}
11721
return FoundE == E;
11722
}
11723
bool VisitStmt(const Stmt *S) {
11724
for (const Stmt *Child : S->children()) {
11725
if (!Child)
11726
continue;
11727
if (const auto *E = dyn_cast<Expr>(Child))
11728
if (!E->isGLValue())
11729
continue;
11730
if (Visit(Child))
11731
return true;
11732
}
11733
return false;
11734
}
11735
explicit LastprivateConditionalRefChecker(
11736
ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11737
: LPM(LPM) {}
11738
std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11739
getFoundData() const {
11740
return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11741
}
11742
};
11743
} // namespace
11744
11745
void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11746
LValue IVLVal,
11747
StringRef UniqueDeclName,
11748
LValue LVal,
11749
SourceLocation Loc) {
11750
// Last updated loop counter for the lastprivate conditional var.
11751
// int<xx> last_iv = 0;
11752
llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11753
llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11754
LLIVTy, getName({UniqueDeclName, "iv"}));
11755
cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11756
IVLVal.getAlignment().getAsAlign());
11757
LValue LastIVLVal =
11758
CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11759
11760
// Last value of the lastprivate conditional.
11761
// decltype(priv_a) last_a;
11762
llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11763
CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11764
cast<llvm::GlobalVariable>(Last)->setAlignment(
11765
LVal.getAlignment().getAsAlign());
11766
LValue LastLVal =
11767
CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11768
11769
// Global loop counter. Required to handle inner parallel-for regions.
11770
// iv
11771
llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11772
11773
// #pragma omp critical(a)
11774
// if (last_iv <= iv) {
11775
// last_iv = iv;
11776
// last_a = priv_a;
11777
// }
11778
auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11779
Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11780
Action.Enter(CGF);
11781
llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11782
// (last_iv <= iv) ? Check if the variable is updated and store new
11783
// value in global var.
11784
llvm::Value *CmpRes;
11785
if (IVLVal.getType()->isSignedIntegerType()) {
11786
CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11787
} else {
11788
assert(IVLVal.getType()->isUnsignedIntegerType() &&
11789
"Loop iteration variable must be integer.");
11790
CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11791
}
11792
llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11793
llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11794
CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11795
// {
11796
CGF.EmitBlock(ThenBB);
11797
11798
// last_iv = iv;
11799
CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11800
11801
// last_a = priv_a;
11802
switch (CGF.getEvaluationKind(LVal.getType())) {
11803
case TEK_Scalar: {
11804
llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11805
CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11806
break;
11807
}
11808
case TEK_Complex: {
11809
CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11810
CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11811
break;
11812
}
11813
case TEK_Aggregate:
11814
llvm_unreachable(
11815
"Aggregates are not supported in lastprivate conditional.");
11816
}
11817
// }
11818
CGF.EmitBranch(ExitBB);
11819
// There is no need to emit line number for unconditional branch.
11820
(void)ApplyDebugLocation::CreateEmpty(CGF);
11821
CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11822
};
11823
11824
if (CGM.getLangOpts().OpenMPSimd) {
11825
// Do not emit as a critical region as no parallel region could be emitted.
11826
RegionCodeGenTy ThenRCG(CodeGen);
11827
ThenRCG(CGF);
11828
} else {
11829
emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11830
}
11831
}
11832
11833
void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11834
const Expr *LHS) {
11835
if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11836
return;
11837
LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11838
if (!Checker.Visit(LHS))
11839
return;
11840
const Expr *FoundE;
11841
const Decl *FoundD;
11842
StringRef UniqueDeclName;
11843
LValue IVLVal;
11844
llvm::Function *FoundFn;
11845
std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11846
Checker.getFoundData();
11847
if (FoundFn != CGF.CurFn) {
11848
// Special codegen for inner parallel regions.
11849
// ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11850
auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11851
assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11852
"Lastprivate conditional is not found in outer region.");
11853
QualType StructTy = std::get<0>(It->getSecond());
11854
const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11855
LValue PrivLVal = CGF.EmitLValue(FoundE);
11856
Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11857
PrivLVal.getAddress(),
11858
CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11859
CGF.ConvertTypeForMem(StructTy));
11860
LValue BaseLVal =
11861
CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11862
LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11863
CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11864
CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11865
FiredLVal, llvm::AtomicOrdering::Unordered,
11866
/*IsVolatile=*/true, /*isInit=*/false);
11867
return;
11868
}
11869
11870
// Private address of the lastprivate conditional in the current context.
11871
// priv_a
11872
LValue LVal = CGF.EmitLValue(FoundE);
11873
emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11874
FoundE->getExprLoc());
11875
}
11876
11877
void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11878
CodeGenFunction &CGF, const OMPExecutableDirective &D,
11879
const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11880
if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11881
return;
11882
auto Range = llvm::reverse(LastprivateConditionalStack);
11883
auto It = llvm::find_if(
11884
Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11885
if (It == Range.end() || It->Fn != CGF.CurFn)
11886
return;
11887
auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11888
assert(LPCI != LastprivateConditionalToTypes.end() &&
11889
"Lastprivates must be registered already.");
11890
SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11891
getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11892
const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11893
for (const auto &Pair : It->DeclToUniqueName) {
11894
const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11895
if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11896
continue;
11897
auto I = LPCI->getSecond().find(Pair.first);
11898
assert(I != LPCI->getSecond().end() &&
11899
"Lastprivate must be rehistered already.");
11900
// bool Cmp = priv_a.Fired != 0;
11901
LValue BaseLVal = std::get<3>(I->getSecond());
11902
LValue FiredLVal =
11903
CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11904
llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11905
llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11906
llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11907
llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11908
// if (Cmp) {
11909
CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11910
CGF.EmitBlock(ThenBB);
11911
Address Addr = CGF.GetAddrOfLocalVar(VD);
11912
LValue LVal;
11913
if (VD->getType()->isReferenceType())
11914
LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11915
AlignmentSource::Decl);
11916
else
11917
LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11918
AlignmentSource::Decl);
11919
emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11920
D.getBeginLoc());
11921
auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11922
CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11923
// }
11924
}
11925
}
11926
11927
void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11928
CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11929
SourceLocation Loc) {
11930
if (CGF.getLangOpts().OpenMP < 50)
11931
return;
11932
auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11933
assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11934
"Unknown lastprivate conditional variable.");
11935
StringRef UniqueName = It->second;
11936
llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11937
// The variable was not updated in the region - exit.
11938
if (!GV)
11939
return;
11940
LValue LPLVal = CGF.MakeRawAddrLValue(
11941
GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11942
llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11943
CGF.EmitStoreOfScalar(Res, PrivLVal);
11944
}
11945
11946
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11947
CodeGenFunction &CGF, const OMPExecutableDirective &D,
11948
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11949
const RegionCodeGenTy &CodeGen) {
11950
llvm_unreachable("Not supported in SIMD-only mode");
11951
}
11952
11953
llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11954
CodeGenFunction &CGF, const OMPExecutableDirective &D,
11955
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11956
const RegionCodeGenTy &CodeGen) {
11957
llvm_unreachable("Not supported in SIMD-only mode");
11958
}
11959
11960
llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11961
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11962
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11963
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11964
bool Tied, unsigned &NumberOfParts) {
11965
llvm_unreachable("Not supported in SIMD-only mode");
11966
}
11967
11968
void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11969
SourceLocation Loc,
11970
llvm::Function *OutlinedFn,
11971
ArrayRef<llvm::Value *> CapturedVars,
11972
const Expr *IfCond,
11973
llvm::Value *NumThreads) {
11974
llvm_unreachable("Not supported in SIMD-only mode");
11975
}
11976
11977
void CGOpenMPSIMDRuntime::emitCriticalRegion(
11978
CodeGenFunction &CGF, StringRef CriticalName,
11979
const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11980
const Expr *Hint) {
11981
llvm_unreachable("Not supported in SIMD-only mode");
11982
}
11983
11984
void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11985
const RegionCodeGenTy &MasterOpGen,
11986
SourceLocation Loc) {
11987
llvm_unreachable("Not supported in SIMD-only mode");
11988
}
11989
11990
void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11991
const RegionCodeGenTy &MasterOpGen,
11992
SourceLocation Loc,
11993
const Expr *Filter) {
11994
llvm_unreachable("Not supported in SIMD-only mode");
11995
}
11996
11997
void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11998
SourceLocation Loc) {
11999
llvm_unreachable("Not supported in SIMD-only mode");
12000
}
12001
12002
void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12003
CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12004
SourceLocation Loc) {
12005
llvm_unreachable("Not supported in SIMD-only mode");
12006
}
12007
12008
void CGOpenMPSIMDRuntime::emitSingleRegion(
12009
CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12010
SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12011
ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12012
ArrayRef<const Expr *> AssignmentOps) {
12013
llvm_unreachable("Not supported in SIMD-only mode");
12014
}
12015
12016
void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12017
const RegionCodeGenTy &OrderedOpGen,
12018
SourceLocation Loc,
12019
bool IsThreads) {
12020
llvm_unreachable("Not supported in SIMD-only mode");
12021
}
12022
12023
void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12024
SourceLocation Loc,
12025
OpenMPDirectiveKind Kind,
12026
bool EmitChecks,
12027
bool ForceSimpleCall) {
12028
llvm_unreachable("Not supported in SIMD-only mode");
12029
}
12030
12031
void CGOpenMPSIMDRuntime::emitForDispatchInit(
12032
CodeGenFunction &CGF, SourceLocation Loc,
12033
const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12034
bool Ordered, const DispatchRTInput &DispatchValues) {
12035
llvm_unreachable("Not supported in SIMD-only mode");
12036
}
12037
12038
void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12039
SourceLocation Loc) {
12040
llvm_unreachable("Not supported in SIMD-only mode");
12041
}
12042
12043
void CGOpenMPSIMDRuntime::emitForStaticInit(
12044
CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12045
const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12046
llvm_unreachable("Not supported in SIMD-only mode");
12047
}
12048
12049
void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12050
CodeGenFunction &CGF, SourceLocation Loc,
12051
OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12052
llvm_unreachable("Not supported in SIMD-only mode");
12053
}
12054
12055
void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12056
SourceLocation Loc,
12057
unsigned IVSize,
12058
bool IVSigned) {
12059
llvm_unreachable("Not supported in SIMD-only mode");
12060
}
12061
12062
void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12063
SourceLocation Loc,
12064
OpenMPDirectiveKind DKind) {
12065
llvm_unreachable("Not supported in SIMD-only mode");
12066
}
12067
12068
llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12069
SourceLocation Loc,
12070
unsigned IVSize, bool IVSigned,
12071
Address IL, Address LB,
12072
Address UB, Address ST) {
12073
llvm_unreachable("Not supported in SIMD-only mode");
12074
}
12075
12076
void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12077
llvm::Value *NumThreads,
12078
SourceLocation Loc) {
12079
llvm_unreachable("Not supported in SIMD-only mode");
12080
}
12081
12082
void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12083
ProcBindKind ProcBind,
12084
SourceLocation Loc) {
12085
llvm_unreachable("Not supported in SIMD-only mode");
12086
}
12087
12088
Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12089
const VarDecl *VD,
12090
Address VDAddr,
12091
SourceLocation Loc) {
12092
llvm_unreachable("Not supported in SIMD-only mode");
12093
}
12094
12095
llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12096
const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12097
CodeGenFunction *CGF) {
12098
llvm_unreachable("Not supported in SIMD-only mode");
12099
}
12100
12101
Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12102
CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12103
llvm_unreachable("Not supported in SIMD-only mode");
12104
}
12105
12106
void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12107
ArrayRef<const Expr *> Vars,
12108
SourceLocation Loc,
12109
llvm::AtomicOrdering AO) {
12110
llvm_unreachable("Not supported in SIMD-only mode");
12111
}
12112
12113
void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12114
const OMPExecutableDirective &D,
12115
llvm::Function *TaskFunction,
12116
QualType SharedsTy, Address Shareds,
12117
const Expr *IfCond,
12118
const OMPTaskDataTy &Data) {
12119
llvm_unreachable("Not supported in SIMD-only mode");
12120
}
12121
12122
void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12123
CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12124
llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12125
const Expr *IfCond, const OMPTaskDataTy &Data) {
12126
llvm_unreachable("Not supported in SIMD-only mode");
12127
}
12128
12129
void CGOpenMPSIMDRuntime::emitReduction(
12130
CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12131
ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12132
ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12133
assert(Options.SimpleReduction && "Only simple reduction is expected.");
12134
CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12135
ReductionOps, Options);
12136
}
12137
12138
llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12139
CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12140
ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12141
llvm_unreachable("Not supported in SIMD-only mode");
12142
}
12143
12144
void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12145
SourceLocation Loc,
12146
bool IsWorksharingReduction) {
12147
llvm_unreachable("Not supported in SIMD-only mode");
12148
}
12149
12150
void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12151
SourceLocation Loc,
12152
ReductionCodeGen &RCG,
12153
unsigned N) {
12154
llvm_unreachable("Not supported in SIMD-only mode");
12155
}
12156
12157
Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12158
SourceLocation Loc,
12159
llvm::Value *ReductionsPtr,
12160
LValue SharedLVal) {
12161
llvm_unreachable("Not supported in SIMD-only mode");
12162
}
12163
12164
void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12165
SourceLocation Loc,
12166
const OMPTaskDataTy &Data) {
12167
llvm_unreachable("Not supported in SIMD-only mode");
12168
}
12169
12170
void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12171
CodeGenFunction &CGF, SourceLocation Loc,
12172
OpenMPDirectiveKind CancelRegion) {
12173
llvm_unreachable("Not supported in SIMD-only mode");
12174
}
12175
12176
void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12177
SourceLocation Loc, const Expr *IfCond,
12178
OpenMPDirectiveKind CancelRegion) {
12179
llvm_unreachable("Not supported in SIMD-only mode");
12180
}
12181
12182
void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12183
const OMPExecutableDirective &D, StringRef ParentName,
12184
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12185
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12186
llvm_unreachable("Not supported in SIMD-only mode");
12187
}
12188
12189
void CGOpenMPSIMDRuntime::emitTargetCall(
12190
CodeGenFunction &CGF, const OMPExecutableDirective &D,
12191
llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12192
llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12193
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12194
const OMPLoopDirective &D)>
12195
SizeEmitter) {
12196
llvm_unreachable("Not supported in SIMD-only mode");
12197
}
12198
12199
bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12200
llvm_unreachable("Not supported in SIMD-only mode");
12201
}
12202
12203
bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12204
llvm_unreachable("Not supported in SIMD-only mode");
12205
}
12206
12207
bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12208
return false;
12209
}
12210
12211
void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12212
const OMPExecutableDirective &D,
12213
SourceLocation Loc,
12214
llvm::Function *OutlinedFn,
12215
ArrayRef<llvm::Value *> CapturedVars) {
12216
llvm_unreachable("Not supported in SIMD-only mode");
12217
}
12218
12219
void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12220
const Expr *NumTeams,
12221
const Expr *ThreadLimit,
12222
SourceLocation Loc) {
12223
llvm_unreachable("Not supported in SIMD-only mode");
12224
}
12225
12226
void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12227
CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12228
const Expr *Device, const RegionCodeGenTy &CodeGen,
12229
CGOpenMPRuntime::TargetDataInfo &Info) {
12230
llvm_unreachable("Not supported in SIMD-only mode");
12231
}
12232
12233
void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12234
CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12235
const Expr *Device) {
12236
llvm_unreachable("Not supported in SIMD-only mode");
12237
}
12238
12239
void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12240
const OMPLoopDirective &D,
12241
ArrayRef<Expr *> NumIterations) {
12242
llvm_unreachable("Not supported in SIMD-only mode");
12243
}
12244
12245
void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12246
const OMPDependClause *C) {
12247
llvm_unreachable("Not supported in SIMD-only mode");
12248
}
12249
12250
void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12251
const OMPDoacrossClause *C) {
12252
llvm_unreachable("Not supported in SIMD-only mode");
12253
}
12254
12255
const VarDecl *
12256
CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12257
const VarDecl *NativeParam) const {
12258
llvm_unreachable("Not supported in SIMD-only mode");
12259
}
12260
12261
Address
12262
CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12263
const VarDecl *NativeParam,
12264
const VarDecl *TargetParam) const {
12265
llvm_unreachable("Not supported in SIMD-only mode");
12266
}
12267
12268