CoCalc -- AMDGPUAttributor.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
³⁵²⁶⁹ views
1
//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10
//
11
//===----------------------------------------------------------------------===//
12

13
#include "AMDGPU.h"
14
#include "GCNSubtarget.h"
15
#include "Utils/AMDGPUBaseInfo.h"
16
#include "llvm/Analysis/CycleAnalysis.h"
17
#include "llvm/CodeGen/TargetPassConfig.h"
18
#include "llvm/IR/IntrinsicsAMDGPU.h"
19
#include "llvm/IR/IntrinsicsR600.h"
20
#include "llvm/Target/TargetMachine.h"
21
#include "llvm/Transforms/IPO/Attributor.h"
22

23
#define DEBUG_TYPE "amdgpu-attributor"
24

25
namespace llvm {
26
void initializeCycleInfoWrapperPassPass(PassRegistry &);
27
} // namespace llvm
28

29
using namespace llvm;
30

31
static cl::opt<unsigned> KernargPreloadCount(
32
    "amdgpu-kernarg-preload-count",
33
    cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
34

35
#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
36

37
enum ImplicitArgumentPositions {
38
  #include "AMDGPUAttributes.def"
39
  LAST_ARG_POS
40
};
41

42
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
43

44
enum ImplicitArgumentMask {
45
  NOT_IMPLICIT_INPUT = 0,
46
  #include "AMDGPUAttributes.def"
47
  ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
48
};
49

50
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
51
static constexpr std::pair<ImplicitArgumentMask,
52
                           StringLiteral> ImplicitAttrs[] = {
53
 #include "AMDGPUAttributes.def"
54
};
55

56
// We do not need to note the x workitem or workgroup id because they are always
57
// initialized.
58
//
59
// TODO: We should not add the attributes if the known compile time workgroup
60
// size is 1 for y/z.
61
static ImplicitArgumentMask
62
intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
63
                    bool HasApertureRegs, bool SupportsGetDoorBellID,
64
                    unsigned CodeObjectVersion) {
65
  switch (ID) {
66
  case Intrinsic::amdgcn_workitem_id_x:
67
    NonKernelOnly = true;
68
    return WORKITEM_ID_X;
69
  case Intrinsic::amdgcn_workgroup_id_x:
70
    NonKernelOnly = true;
71
    return WORKGROUP_ID_X;
72
  case Intrinsic::amdgcn_workitem_id_y:
73
  case Intrinsic::r600_read_tidig_y:
74
    return WORKITEM_ID_Y;
75
  case Intrinsic::amdgcn_workitem_id_z:
76
  case Intrinsic::r600_read_tidig_z:
77
    return WORKITEM_ID_Z;
78
  case Intrinsic::amdgcn_workgroup_id_y:
79
  case Intrinsic::r600_read_tgid_y:
80
    return WORKGROUP_ID_Y;
81
  case Intrinsic::amdgcn_workgroup_id_z:
82
  case Intrinsic::r600_read_tgid_z:
83
    return WORKGROUP_ID_Z;
84
  case Intrinsic::amdgcn_lds_kernel_id:
85
    return LDS_KERNEL_ID;
86
  case Intrinsic::amdgcn_dispatch_ptr:
87
    return DISPATCH_PTR;
88
  case Intrinsic::amdgcn_dispatch_id:
89
    return DISPATCH_ID;
90
  case Intrinsic::amdgcn_implicitarg_ptr:
91
    return IMPLICIT_ARG_PTR;
92
  // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
93
  // queue_ptr.
94
  case Intrinsic::amdgcn_queue_ptr:
95
    NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
96
    return QUEUE_PTR;
97
  case Intrinsic::amdgcn_is_shared:
98
  case Intrinsic::amdgcn_is_private:
99
    if (HasApertureRegs)
100
      return NOT_IMPLICIT_INPUT;
101
    // Under V5, we need implicitarg_ptr + offsets to access private_base or
102
    // shared_base. For pre-V5, however, need to access them through queue_ptr +
103
    // offsets.
104
    return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR :
105
                                                      QUEUE_PTR;
106
  case Intrinsic::trap:
107
    if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
108
      return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT :
109
                                                        QUEUE_PTR;
110
    NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
111
    return QUEUE_PTR;
112
  default:
113
    return NOT_IMPLICIT_INPUT;
114
  }
115
}
116

117
static bool castRequiresQueuePtr(unsigned SrcAS) {
118
  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
119
}
120

121
static bool isDSAddress(const Constant *C) {
122
  const GlobalValue *GV = dyn_cast<GlobalValue>(C);
123
  if (!GV)
124
    return false;
125
  unsigned AS = GV->getAddressSpace();
126
  return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
127
}
128

129
/// Returns true if the function requires the implicit argument be passed
130
/// regardless of the function contents.
131
static bool funcRequiresHostcallPtr(const Function &F) {
132
  // Sanitizers require the hostcall buffer passed in the implicit arguments.
133
  return F.hasFnAttribute(Attribute::SanitizeAddress) ||
134
         F.hasFnAttribute(Attribute::SanitizeThread) ||
135
         F.hasFnAttribute(Attribute::SanitizeMemory) ||
136
         F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
137
         F.hasFnAttribute(Attribute::SanitizeMemTag);
138
}
139

140
namespace {
141
class AMDGPUInformationCache : public InformationCache {
142
public:
143
  AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
144
                         BumpPtrAllocator &Allocator,
145
                         SetVector<Function *> *CGSCC, TargetMachine &TM)
146
      : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
147
        CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
148

149
  TargetMachine &TM;
150

151
  enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
152

153
  /// Check if the subtarget has aperture regs.
154
  bool hasApertureRegs(Function &F) {
155
    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
156
    return ST.hasApertureRegs();
157
  }
158

159
  /// Check if the subtarget supports GetDoorbellID.
160
  bool supportsGetDoorbellID(Function &F) {
161
    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
162
    return ST.supportsGetDoorbellID();
163
  }
164

165
  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
166
    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
167
    return ST.getFlatWorkGroupSizes(F);
168
  }
169

170
  std::pair<unsigned, unsigned>
171
  getMaximumFlatWorkGroupRange(const Function &F) {
172
    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
173
    return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
174
  }
175

176
  /// Get code object version.
177
  unsigned getCodeObjectVersion() const {
178
    return CodeObjectVersion;
179
  }
180

181
  /// Get the effective value of "amdgpu-waves-per-eu" for the function,
182
  /// accounting for the interaction with the passed value to use for
183
  /// "amdgpu-flat-work-group-size".
184
  std::pair<unsigned, unsigned>
185
  getWavesPerEU(const Function &F,
186
                std::pair<unsigned, unsigned> FlatWorkGroupSize) {
187
    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
188
    return ST.getWavesPerEU(F, FlatWorkGroupSize);
189
  }
190

191
  std::pair<unsigned, unsigned>
192
  getEffectiveWavesPerEU(const Function &F,
193
                         std::pair<unsigned, unsigned> WavesPerEU,
194
                         std::pair<unsigned, unsigned> FlatWorkGroupSize) {
195
    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
196
    return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize);
197
  }
198

199
  unsigned getMaxWavesPerEU(const Function &F) {
200
    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
201
    return ST.getMaxWavesPerEU();
202
  }
203

204
private:
205
  /// Check if the ConstantExpr \p CE requires the queue pointer.
206
  static bool visitConstExpr(const ConstantExpr *CE) {
207
    if (CE->getOpcode() == Instruction::AddrSpaceCast) {
208
      unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
209
      return castRequiresQueuePtr(SrcAS);
210
    }
211
    return false;
212
  }
213

214
  /// Get the constant access bitmap for \p C.
215
  uint8_t getConstantAccess(const Constant *C,
216
                            SmallPtrSetImpl<const Constant *> &Visited) {
217
    auto It = ConstantStatus.find(C);
218
    if (It != ConstantStatus.end())
219
      return It->second;
220

221
    uint8_t Result = 0;
222
    if (isDSAddress(C))
223
      Result = DS_GLOBAL;
224

225
    if (const auto *CE = dyn_cast<ConstantExpr>(C))
226
      if (visitConstExpr(CE))
227
        Result |= ADDR_SPACE_CAST;
228

229
    for (const Use &U : C->operands()) {
230
      const auto *OpC = dyn_cast<Constant>(U);
231
      if (!OpC || !Visited.insert(OpC).second)
232
        continue;
233

234
      Result |= getConstantAccess(OpC, Visited);
235
    }
236
    return Result;
237
  }
238

239
public:
240
  /// Returns true if \p Fn needs the queue pointer because of \p C.
241
  bool needsQueuePtr(const Constant *C, Function &Fn) {
242
    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
243
    bool HasAperture = hasApertureRegs(Fn);
244

245
    // No need to explore the constants.
246
    if (!IsNonEntryFunc && HasAperture)
247
      return false;
248

249
    SmallPtrSet<const Constant *, 8> Visited;
250
    uint8_t Access = getConstantAccess(C, Visited);
251

252
    // We need to trap on DS globals in non-entry functions.
253
    if (IsNonEntryFunc && (Access & DS_GLOBAL))
254
      return true;
255

256
    return !HasAperture && (Access & ADDR_SPACE_CAST);
257
  }
258

259
private:
260
  /// Used to determine if the Constant needs the queue pointer.
261
  DenseMap<const Constant *, uint8_t> ConstantStatus;
262
  const unsigned CodeObjectVersion;
263
};
264

265
struct AAAMDAttributes
266
    : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
267
                          AbstractAttribute> {
268
  using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
269
                            AbstractAttribute>;
270

271
  AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
272

273
  /// Create an abstract attribute view for the position \p IRP.
274
  static AAAMDAttributes &createForPosition(const IRPosition &IRP,
275
                                            Attributor &A);
276

277
  /// See AbstractAttribute::getName().
278
  const std::string getName() const override { return "AAAMDAttributes"; }
279

280
  /// See AbstractAttribute::getIdAddr().
281
  const char *getIdAddr() const override { return &ID; }
282

283
  /// This function should return true if the type of the \p AA is
284
  /// AAAMDAttributes.
285
  static bool classof(const AbstractAttribute *AA) {
286
    return (AA->getIdAddr() == &ID);
287
  }
288

289
  /// Unique ID (due to the unique address)
290
  static const char ID;
291
};
292
const char AAAMDAttributes::ID = 0;
293

294
struct AAUniformWorkGroupSize
295
    : public StateWrapper<BooleanState, AbstractAttribute> {
296
  using Base = StateWrapper<BooleanState, AbstractAttribute>;
297
  AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
298

299
  /// Create an abstract attribute view for the position \p IRP.
300
  static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
301
                                                   Attributor &A);
302

303
  /// See AbstractAttribute::getName().
304
  const std::string getName() const override {
305
    return "AAUniformWorkGroupSize";
306
  }
307

308
  /// See AbstractAttribute::getIdAddr().
309
  const char *getIdAddr() const override { return &ID; }
310

311
  /// This function should return true if the type of the \p AA is
312
  /// AAAMDAttributes.
313
  static bool classof(const AbstractAttribute *AA) {
314
    return (AA->getIdAddr() == &ID);
315
  }
316

317
  /// Unique ID (due to the unique address)
318
  static const char ID;
319
};
320
const char AAUniformWorkGroupSize::ID = 0;
321

322
struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
323
  AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
324
      : AAUniformWorkGroupSize(IRP, A) {}
325

326
  void initialize(Attributor &A) override {
327
    Function *F = getAssociatedFunction();
328
    CallingConv::ID CC = F->getCallingConv();
329

330
    if (CC != CallingConv::AMDGPU_KERNEL)
331
      return;
332

333
    bool InitialValue = false;
334
    if (F->hasFnAttribute("uniform-work-group-size"))
335
      InitialValue =
336
          F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
337
          "true";
338

339
    if (InitialValue)
340
      indicateOptimisticFixpoint();
341
    else
342
      indicatePessimisticFixpoint();
343
  }
344

345
  ChangeStatus updateImpl(Attributor &A) override {
346
    ChangeStatus Change = ChangeStatus::UNCHANGED;
347

348
    auto CheckCallSite = [&](AbstractCallSite CS) {
349
      Function *Caller = CS.getInstruction()->getFunction();
350
      LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
351
                        << "->" << getAssociatedFunction()->getName() << "\n");
352

353
      const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
354
          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
355
      if (!CallerInfo)
356
        return false;
357

358
      Change = Change | clampStateAndIndicateChange(this->getState(),
359
                                                    CallerInfo->getState());
360

361
      return true;
362
    };
363

364
    bool AllCallSitesKnown = true;
365
    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
366
      return indicatePessimisticFixpoint();
367

368
    return Change;
369
  }
370

371
  ChangeStatus manifest(Attributor &A) override {
372
    SmallVector<Attribute, 8> AttrList;
373
    LLVMContext &Ctx = getAssociatedFunction()->getContext();
374

375
    AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
376
                                      getAssumed() ? "true" : "false"));
377
    return A.manifestAttrs(getIRPosition(), AttrList,
378
                           /* ForceReplace */ true);
379
  }
380

381
  bool isValidState() const override {
382
    // This state is always valid, even when the state is false.
383
    return true;
384
  }
385

386
  const std::string getAsStr(Attributor *) const override {
387
    return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
388
  }
389

390
  /// See AbstractAttribute::trackStatistics()
391
  void trackStatistics() const override {}
392
};
393

394
AAUniformWorkGroupSize &
395
AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
396
                                          Attributor &A) {
397
  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
398
    return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
399
  llvm_unreachable(
400
      "AAUniformWorkGroupSize is only valid for function position");
401
}
402

403
struct AAAMDAttributesFunction : public AAAMDAttributes {
404
  AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
405
      : AAAMDAttributes(IRP, A) {}
406

407
  void initialize(Attributor &A) override {
408
    Function *F = getAssociatedFunction();
409

410
    // If the function requires the implicit arg pointer due to sanitizers,
411
    // assume it's needed even if explicitly marked as not requiring it.
412
    const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
413
    if (NeedsHostcall) {
414
      removeAssumedBits(IMPLICIT_ARG_PTR);
415
      removeAssumedBits(HOSTCALL_PTR);
416
    }
417

418
    for (auto Attr : ImplicitAttrs) {
419
      if (NeedsHostcall &&
420
          (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
421
        continue;
422

423
      if (F->hasFnAttribute(Attr.second))
424
        addKnownBits(Attr.first);
425
    }
426

427
    if (F->isDeclaration())
428
      return;
429

430
    // Ignore functions with graphics calling conventions, these are currently
431
    // not allowed to have kernel arguments.
432
    if (AMDGPU::isGraphics(F->getCallingConv())) {
433
      indicatePessimisticFixpoint();
434
      return;
435
    }
436
  }
437

438
  ChangeStatus updateImpl(Attributor &A) override {
439
    Function *F = getAssociatedFunction();
440
    // The current assumed state used to determine a change.
441
    auto OrigAssumed = getAssumed();
442

443
    // Check for Intrinsics and propagate attributes.
444
    const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
445
        *this, this->getIRPosition(), DepClassTy::REQUIRED);
446
    if (!AAEdges || AAEdges->hasNonAsmUnknownCallee())
447
      return indicatePessimisticFixpoint();
448

449
    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
450

451
    bool NeedsImplicit = false;
452
    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
453
    bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
454
    bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
455
    unsigned COV = InfoCache.getCodeObjectVersion();
456

457
    for (Function *Callee : AAEdges->getOptimisticEdges()) {
458
      Intrinsic::ID IID = Callee->getIntrinsicID();
459
      if (IID == Intrinsic::not_intrinsic) {
460
        const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
461
            *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
462
        if (!AAAMD)
463
          return indicatePessimisticFixpoint();
464
        *this &= *AAAMD;
465
        continue;
466
      }
467

468
      bool NonKernelOnly = false;
469
      ImplicitArgumentMask AttrMask =
470
          intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
471
                              HasApertureRegs, SupportsGetDoorbellID, COV);
472
      if (AttrMask != NOT_IMPLICIT_INPUT) {
473
        if ((IsNonEntryFunc || !NonKernelOnly))
474
          removeAssumedBits(AttrMask);
475
      }
476
    }
477

478
    // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
479
    if (NeedsImplicit)
480
      removeAssumedBits(IMPLICIT_ARG_PTR);
481

482
    if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
483
      // Under V5, we need implicitarg_ptr + offsets to access private_base or
484
      // shared_base. We do not actually need queue_ptr.
485
      if (COV >= 5)
486
        removeAssumedBits(IMPLICIT_ARG_PTR);
487
      else
488
        removeAssumedBits(QUEUE_PTR);
489
    }
490

491
    if (funcRetrievesMultigridSyncArg(A, COV)) {
492
      assert(!isAssumed(IMPLICIT_ARG_PTR) &&
493
             "multigrid_sync_arg needs implicitarg_ptr");
494
      removeAssumedBits(MULTIGRID_SYNC_ARG);
495
    }
496

497
    if (funcRetrievesHostcallPtr(A, COV)) {
498
      assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
499
      removeAssumedBits(HOSTCALL_PTR);
500
    }
501

502
    if (funcRetrievesHeapPtr(A, COV)) {
503
      assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
504
      removeAssumedBits(HEAP_PTR);
505
    }
506

507
    if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
508
      assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
509
      removeAssumedBits(QUEUE_PTR);
510
    }
511

512
    if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
513
      removeAssumedBits(LDS_KERNEL_ID);
514
    }
515

516
    if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
517
      removeAssumedBits(DEFAULT_QUEUE);
518

519
    if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
520
      removeAssumedBits(COMPLETION_ACTION);
521

522
    return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
523
                                       : ChangeStatus::UNCHANGED;
524
  }
525

526
  ChangeStatus manifest(Attributor &A) override {
527
    SmallVector<Attribute, 8> AttrList;
528
    LLVMContext &Ctx = getAssociatedFunction()->getContext();
529

530
    for (auto Attr : ImplicitAttrs) {
531
      if (isKnown(Attr.first))
532
        AttrList.push_back(Attribute::get(Ctx, Attr.second));
533
    }
534

535
    return A.manifestAttrs(getIRPosition(), AttrList,
536
                           /* ForceReplace */ true);
537
  }
538

539
  const std::string getAsStr(Attributor *) const override {
540
    std::string Str;
541
    raw_string_ostream OS(Str);
542
    OS << "AMDInfo[";
543
    for (auto Attr : ImplicitAttrs)
544
      if (isAssumed(Attr.first))
545
        OS << ' ' << Attr.second;
546
    OS << " ]";
547
    return OS.str();
548
  }
549

550
  /// See AbstractAttribute::trackStatistics()
551
  void trackStatistics() const override {}
552

553
private:
554
  bool checkForQueuePtr(Attributor &A) {
555
    Function *F = getAssociatedFunction();
556
    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
557

558
    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
559

560
    bool NeedsQueuePtr = false;
561

562
    auto CheckAddrSpaceCasts = [&](Instruction &I) {
563
      unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
564
      if (castRequiresQueuePtr(SrcAS)) {
565
        NeedsQueuePtr = true;
566
        return false;
567
      }
568
      return true;
569
    };
570

571
    bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
572

573
    // `checkForAllInstructions` is much more cheaper than going through all
574
    // instructions, try it first.
575

576
    // The queue pointer is not needed if aperture regs is present.
577
    if (!HasApertureRegs) {
578
      bool UsedAssumedInformation = false;
579
      A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
580
                                {Instruction::AddrSpaceCast},
581
                                UsedAssumedInformation);
582
    }
583

584
    // If we found  that we need the queue pointer, nothing else to do.
585
    if (NeedsQueuePtr)
586
      return true;
587

588
    if (!IsNonEntryFunc && HasApertureRegs)
589
      return false;
590

591
    for (BasicBlock &BB : *F) {
592
      for (Instruction &I : BB) {
593
        for (const Use &U : I.operands()) {
594
          if (const auto *C = dyn_cast<Constant>(U)) {
595
            if (InfoCache.needsQueuePtr(C, *F))
596
              return true;
597
          }
598
        }
599
      }
600
    }
601

602
    return false;
603
  }
604

605
  bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
606
    auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV);
607
    AA::RangeTy Range(Pos, 8);
608
    return funcRetrievesImplicitKernelArg(A, Range);
609
  }
610

611
  bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
612
    auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV);
613
    AA::RangeTy Range(Pos, 8);
614
    return funcRetrievesImplicitKernelArg(A, Range);
615
  }
616

617
  bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
618
    auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV);
619
    AA::RangeTy Range(Pos, 8);
620
    return funcRetrievesImplicitKernelArg(A, Range);
621
  }
622

623
  bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
624
    auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV);
625
    AA::RangeTy Range(Pos, 8);
626
    return funcRetrievesImplicitKernelArg(A, Range);
627
  }
628

629
  bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
630
    if (COV < 5)
631
      return false;
632
    AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
633
    return funcRetrievesImplicitKernelArg(A, Range);
634
  }
635

636
  bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
637
    if (COV < 5)
638
      return false;
639
    AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
640
    return funcRetrievesImplicitKernelArg(A, Range);
641
  }
642

643
  bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
644
    // Check if this is a call to the implicitarg_ptr builtin and it
645
    // is used to retrieve the hostcall pointer. The implicit arg for
646
    // hostcall is not used only if every use of the implicitarg_ptr
647
    // is a load that clearly does not retrieve any byte of the
648
    // hostcall pointer. We check this by tracing all the uses of the
649
    // initial call to the implicitarg_ptr intrinsic.
650
    auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
651
      auto &Call = cast<CallBase>(I);
652
      if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
653
        return true;
654

655
      const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
656
          *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
657
      if (!PointerInfoAA)
658
        return false;
659

660
      return PointerInfoAA->forallInterferingAccesses(
661
          Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
662
            return Acc.getRemoteInst()->isDroppable();
663
          });
664
    };
665

666
    bool UsedAssumedInformation = false;
667
    return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
668
                                              UsedAssumedInformation);
669
  }
670

671
  bool funcRetrievesLDSKernelId(Attributor &A) {
672
    auto DoesNotRetrieve = [&](Instruction &I) {
673
      auto &Call = cast<CallBase>(I);
674
      return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
675
    };
676
    bool UsedAssumedInformation = false;
677
    return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
678
                                              UsedAssumedInformation);
679
  }
680
};
681

682
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
683
                                                    Attributor &A) {
684
  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
685
    return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
686
  llvm_unreachable("AAAMDAttributes is only valid for function position");
687
}
688

689
/// Base class to derive different size ranges.
690
struct AAAMDSizeRangeAttribute
691
    : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
692
  using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
693

694
  StringRef AttrName;
695

696
  AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
697
                          StringRef AttrName)
698
      : Base(IRP, 32), AttrName(AttrName) {}
699

700
  /// See AbstractAttribute::trackStatistics()
701
  void trackStatistics() const override {}
702

703
  template <class AttributeImpl>
704
  ChangeStatus updateImplImpl(Attributor &A) {
705
    ChangeStatus Change = ChangeStatus::UNCHANGED;
706

707
    auto CheckCallSite = [&](AbstractCallSite CS) {
708
      Function *Caller = CS.getInstruction()->getFunction();
709
      LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
710
                        << "->" << getAssociatedFunction()->getName() << '\n');
711

712
      const auto *CallerInfo = A.getAAFor<AttributeImpl>(
713
          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
714
      if (!CallerInfo)
715
        return false;
716

717
      Change |=
718
          clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
719

720
      return true;
721
    };
722

723
    bool AllCallSitesKnown = true;
724
    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
725
      return indicatePessimisticFixpoint();
726

727
    return Change;
728
  }
729

730
  ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
731
                                         unsigned Max) {
732
    // Don't add the attribute if it's the implied default.
733
    if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
734
      return ChangeStatus::UNCHANGED;
735

736
    Function *F = getAssociatedFunction();
737
    LLVMContext &Ctx = F->getContext();
738
    SmallString<10> Buffer;
739
    raw_svector_ostream OS(Buffer);
740
    OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
741
    return A.manifestAttrs(getIRPosition(),
742
                           {Attribute::get(Ctx, AttrName, OS.str())},
743
                           /* ForceReplace */ true);
744
  }
745

746
  const std::string getAsStr(Attributor *) const override {
747
    std::string Str;
748
    raw_string_ostream OS(Str);
749
    OS << getName() << '[';
750
    OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
751
    OS << ']';
752
    return OS.str();
753
  }
754
};
755

756
/// Propagate amdgpu-flat-work-group-size attribute.
757
struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
758
  AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
759
      : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
760

761
  void initialize(Attributor &A) override {
762
    Function *F = getAssociatedFunction();
763
    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
764
    unsigned MinGroupSize, MaxGroupSize;
765
    std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
766
    intersectKnown(
767
        ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
768

769
    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
770
      indicatePessimisticFixpoint();
771
  }
772

773
  ChangeStatus updateImpl(Attributor &A) override {
774
    return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
775
  }
776

777
  /// Create an abstract attribute view for the position \p IRP.
778
  static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
779
                                                   Attributor &A);
780

781
  ChangeStatus manifest(Attributor &A) override {
782
    Function *F = getAssociatedFunction();
783
    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
784
    unsigned Min, Max;
785
    std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
786
    return emitAttributeIfNotDefault(A, Min, Max);
787
  }
788

789
  /// See AbstractAttribute::getName()
790
  const std::string getName() const override {
791
    return "AAAMDFlatWorkGroupSize";
792
  }
793

794
  /// See AbstractAttribute::getIdAddr()
795
  const char *getIdAddr() const override { return &ID; }
796

797
  /// This function should return true if the type of the \p AA is
798
  /// AAAMDFlatWorkGroupSize
799
  static bool classof(const AbstractAttribute *AA) {
800
    return (AA->getIdAddr() == &ID);
801
  }
802

803
  /// Unique ID (due to the unique address)
804
  static const char ID;
805
};
806

807
const char AAAMDFlatWorkGroupSize::ID = 0;
808

809
AAAMDFlatWorkGroupSize &
810
AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
811
                                          Attributor &A) {
812
  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
813
    return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
814
  llvm_unreachable(
815
      "AAAMDFlatWorkGroupSize is only valid for function position");
816
}
817

818
/// Propagate amdgpu-waves-per-eu attribute.
819
struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
820
  AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
821
      : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
822

823
  bool isValidState() const override {
824
    return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
825
  }
826

827
  void initialize(Attributor &A) override {
828
    Function *F = getAssociatedFunction();
829
    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
830

831
    if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
832
            *this, IRPosition::function(*F), DepClassTy::REQUIRED)) {
833

834
      unsigned Min, Max;
835
      std::tie(Min, Max) = InfoCache.getWavesPerEU(
836
          *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
837
               AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
838

839
      ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
840
      intersectKnown(Range);
841
    }
842

843
    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
844
      indicatePessimisticFixpoint();
845
  }
846

847
  ChangeStatus updateImpl(Attributor &A) override {
848
    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
849
    ChangeStatus Change = ChangeStatus::UNCHANGED;
850

851
    auto CheckCallSite = [&](AbstractCallSite CS) {
852
      Function *Caller = CS.getInstruction()->getFunction();
853
      Function *Func = getAssociatedFunction();
854
      LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
855
                        << "->" << Func->getName() << '\n');
856

857
      const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
858
          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
859
      const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
860
          *this, IRPosition::function(*Func), DepClassTy::REQUIRED);
861
      if (!CallerInfo || !AssumedGroupSize)
862
        return false;
863

864
      unsigned Min, Max;
865
      std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
866
          *Caller,
867
          {CallerInfo->getAssumed().getLower().getZExtValue(),
868
           CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
869
          {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
870
           AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
871
      ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
872
      IntegerRangeState CallerRangeState(CallerRange);
873
      Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
874

875
      return true;
876
    };
877

878
    bool AllCallSitesKnown = true;
879
    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
880
      return indicatePessimisticFixpoint();
881

882
    return Change;
883
  }
884

885
  /// Create an abstract attribute view for the position \p IRP.
886
  static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
887
                                            Attributor &A);
888

889
  ChangeStatus manifest(Attributor &A) override {
890
    Function *F = getAssociatedFunction();
891
    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
892
    unsigned Max = InfoCache.getMaxWavesPerEU(*F);
893
    return emitAttributeIfNotDefault(A, 1, Max);
894
  }
895

896
  /// See AbstractAttribute::getName()
897
  const std::string getName() const override { return "AAAMDWavesPerEU"; }
898

899
  /// See AbstractAttribute::getIdAddr()
900
  const char *getIdAddr() const override { return &ID; }
901

902
  /// This function should return true if the type of the \p AA is
903
  /// AAAMDWavesPerEU
904
  static bool classof(const AbstractAttribute *AA) {
905
    return (AA->getIdAddr() == &ID);
906
  }
907

908
  /// Unique ID (due to the unique address)
909
  static const char ID;
910
};
911

912
const char AAAMDWavesPerEU::ID = 0;
913

914
AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
915
                                                    Attributor &A) {
916
  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
917
    return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
918
  llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
919
}
920

921
static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
922
  for (const auto &CI : IA->ParseConstraints()) {
923
    for (StringRef Code : CI.Codes) {
924
      Code.consume_front("{");
925
      if (Code.starts_with("a"))
926
        return true;
927
    }
928
  }
929

930
  return false;
931
}
932

933
struct AAAMDGPUNoAGPR
934
    : public IRAttribute<Attribute::NoUnwind,
935
                         StateWrapper<BooleanState, AbstractAttribute>,
936
                         AAAMDGPUNoAGPR> {
937
  AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
938

939
  static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
940
                                           Attributor &A) {
941
    if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
942
      return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
943
    llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
944
  }
945

946
  void initialize(Attributor &A) override {
947
    Function *F = getAssociatedFunction();
948
    if (F->hasFnAttribute("amdgpu-no-agpr"))
949
      indicateOptimisticFixpoint();
950
  }
951

952
  const std::string getAsStr(Attributor *A) const override {
953
    return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
954
  }
955

956
  void trackStatistics() const override {}
957

958
  ChangeStatus updateImpl(Attributor &A) override {
959
    // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
960

961
    auto CheckForNoAGPRs = [&](Instruction &I) {
962
      const auto &CB = cast<CallBase>(I);
963
      const Value *CalleeOp = CB.getCalledOperand();
964
      const Function *Callee = dyn_cast<Function>(CalleeOp);
965
      if (!Callee) {
966
        if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
967
          return !inlineAsmUsesAGPRs(IA);
968
        return false;
969
      }
970

971
      // Some intrinsics may use AGPRs, but if we have a choice, we are not
972
      // required to use AGPRs.
973
      if (Callee->isIntrinsic())
974
        return true;
975

976
      // TODO: Handle callsite attributes
977
      const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
978
          *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
979
      return CalleeInfo && CalleeInfo->getAssumed();
980
    };
981

982
    bool UsedAssumedInformation = false;
983
    if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
984
                                           UsedAssumedInformation))
985
      return indicatePessimisticFixpoint();
986
    return ChangeStatus::UNCHANGED;
987
  }
988

989
  ChangeStatus manifest(Attributor &A) override {
990
    if (!getAssumed())
991
      return ChangeStatus::UNCHANGED;
992
    LLVMContext &Ctx = getAssociatedFunction()->getContext();
993
    return A.manifestAttrs(getIRPosition(),
994
                           {Attribute::get(Ctx, "amdgpu-no-agpr")});
995
  }
996

997
  const std::string getName() const override { return "AAAMDGPUNoAGPR"; }
998
  const char *getIdAddr() const override { return &ID; }
999

1000
  /// This function should return true if the type of the \p AA is
1001
  /// AAAMDGPUNoAGPRs
1002
  static bool classof(const AbstractAttribute *AA) {
1003
    return (AA->getIdAddr() == &ID);
1004
  }
1005

1006
  static const char ID;
1007
};
1008

1009
const char AAAMDGPUNoAGPR::ID = 0;
1010

1011
static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
1012
  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
1013
  for (unsigned I = 0;
1014
       I < F.arg_size() &&
1015
       I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());
1016
       ++I) {
1017
    Argument &Arg = *F.getArg(I);
1018
    // Check for incompatible attributes.
1019
    if (Arg.hasByRefAttr() || Arg.hasNestAttr())
1020
      break;
1021

1022
    Arg.addAttr(Attribute::InReg);
1023
  }
1024
}
1025

1026
static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
1027
  SetVector<Function *> Functions;
1028
  for (Function &F : M) {
1029
    if (!F.isIntrinsic())
1030
      Functions.insert(&F);
1031
  }
1032

1033
  CallGraphUpdater CGUpdater;
1034
  BumpPtrAllocator Allocator;
1035
  AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1036
  DenseSet<const char *> Allowed(
1037
      {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1038
       &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1039
       &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
1040
       &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1041
       &AAUnderlyingObjects::ID});
1042

1043
  AttributorConfig AC(CGUpdater);
1044
  AC.Allowed = &Allowed;
1045
  AC.IsModulePass = true;
1046
  AC.DefaultInitializeLiveInternals = false;
1047
  AC.IPOAmendableCB = [](const Function &F) {
1048
    return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1049
  };
1050

1051
  Attributor A(Functions, InfoCache, AC);
1052

1053
  for (Function &F : M) {
1054
    if (!F.isIntrinsic()) {
1055
      A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
1056
      A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
1057
      A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F));
1058
      CallingConv::ID CC = F.getCallingConv();
1059
      if (!AMDGPU::isEntryFunctionCC(CC)) {
1060
        A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
1061
        A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
1062
      } else if (CC == CallingConv::AMDGPU_KERNEL) {
1063
        addPreloadKernArgHint(F, TM);
1064
      }
1065
    }
1066
  }
1067

1068
  ChangeStatus Change = A.run();
1069
  return Change == ChangeStatus::CHANGED;
1070
}
1071

1072
class AMDGPUAttributorLegacy : public ModulePass {
1073
public:
1074
  AMDGPUAttributorLegacy() : ModulePass(ID) {}
1075

1076
  /// doInitialization - Virtual method overridden by subclasses to do
1077
  /// any necessary initialization before any pass is run.
1078
  bool doInitialization(Module &) override {
1079
    auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
1080
    if (!TPC)
1081
      report_fatal_error("TargetMachine is required");
1082

1083
    TM = &TPC->getTM<TargetMachine>();
1084
    return false;
1085
  }
1086

1087
  bool runOnModule(Module &M) override {
1088
    AnalysisGetter AG(this);
1089
    return runImpl(M, AG, *TM);
1090
  }
1091

1092
  void getAnalysisUsage(AnalysisUsage &AU) const override {
1093
    AU.addRequired<CycleInfoWrapperPass>();
1094
  }
1095

1096
  StringRef getPassName() const override { return "AMDGPU Attributor"; }
1097
  TargetMachine *TM;
1098
  static char ID;
1099
};
1100
} // namespace
1101

1102
PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,
1103
                                                  ModuleAnalysisManager &AM) {
1104

1105
  FunctionAnalysisManager &FAM =
1106
      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1107
  AnalysisGetter AG(FAM);
1108

1109
  // TODO: Probably preserves CFG
1110
  return runImpl(M, AG, TM) ? PreservedAnalyses::none()
1111
                            : PreservedAnalyses::all();
1112
}
1113

1114
char AMDGPUAttributorLegacy::ID = 0;
1115

1116
Pass *llvm::createAMDGPUAttributorLegacyPass() {
1117
  return new AMDGPUAttributorLegacy();
1118
}
1119
INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
1120
                      false, false)
1121
INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass);
1122
INITIALIZE_PASS_END(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
1123
                    false, false)
1124

1125
Product

Resources

Company