CoCalc -- PassBuilderPipelines.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
³⁵²⁶² views
1
//===- Construction of pass pipelines -------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
///
10
/// This file provides the implementation of the PassBuilder based on our
11
/// static pass registry as well as related functionality. It also provides
12
/// helpers to aid in analyzing, debugging, and testing passes and pass
13
/// pipelines.
14
///
15
//===----------------------------------------------------------------------===//
16

17
#include "llvm/ADT/Statistic.h"
18
#include "llvm/Analysis/AliasAnalysis.h"
19
#include "llvm/Analysis/BasicAliasAnalysis.h"
20
#include "llvm/Analysis/CGSCCPassManager.h"
21
#include "llvm/Analysis/GlobalsModRef.h"
22
#include "llvm/Analysis/InlineAdvisor.h"
23
#include "llvm/Analysis/ProfileSummaryInfo.h"
24
#include "llvm/Analysis/ScopedNoAliasAA.h"
25
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
26
#include "llvm/IR/PassManager.h"
27
#include "llvm/Passes/OptimizationLevel.h"
28
#include "llvm/Passes/PassBuilder.h"
29
#include "llvm/Support/CommandLine.h"
30
#include "llvm/Support/ErrorHandling.h"
31
#include "llvm/Support/PGOOptions.h"
32
#include "llvm/Support/VirtualFileSystem.h"
33
#include "llvm/Target/TargetMachine.h"
34
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
35
#include "llvm/Transforms/Coroutines/CoroCleanup.h"
36
#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
37
#include "llvm/Transforms/Coroutines/CoroEarly.h"
38
#include "llvm/Transforms/Coroutines/CoroElide.h"
39
#include "llvm/Transforms/Coroutines/CoroSplit.h"
40
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
41
#include "llvm/Transforms/IPO/AlwaysInliner.h"
42
#include "llvm/Transforms/IPO/Annotation2Metadata.h"
43
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
44
#include "llvm/Transforms/IPO/Attributor.h"
45
#include "llvm/Transforms/IPO/CalledValuePropagation.h"
46
#include "llvm/Transforms/IPO/ConstantMerge.h"
47
#include "llvm/Transforms/IPO/CrossDSOCFI.h"
48
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
49
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
50
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
51
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
52
#include "llvm/Transforms/IPO/FunctionAttrs.h"
53
#include "llvm/Transforms/IPO/GlobalDCE.h"
54
#include "llvm/Transforms/IPO/GlobalOpt.h"
55
#include "llvm/Transforms/IPO/GlobalSplit.h"
56
#include "llvm/Transforms/IPO/HotColdSplitting.h"
57
#include "llvm/Transforms/IPO/IROutliner.h"
58
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
59
#include "llvm/Transforms/IPO/Inliner.h"
60
#include "llvm/Transforms/IPO/LowerTypeTests.h"
61
#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
62
#include "llvm/Transforms/IPO/MergeFunctions.h"
63
#include "llvm/Transforms/IPO/ModuleInliner.h"
64
#include "llvm/Transforms/IPO/OpenMPOpt.h"
65
#include "llvm/Transforms/IPO/PartialInlining.h"
66
#include "llvm/Transforms/IPO/SCCP.h"
67
#include "llvm/Transforms/IPO/SampleProfile.h"
68
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
69
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
70
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
71
#include "llvm/Transforms/InstCombine/InstCombine.h"
72
#include "llvm/Transforms/Instrumentation/CGProfile.h"
73
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
74
#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
75
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
76
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
77
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
78
#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
79
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
80
#include "llvm/Transforms/Scalar/ADCE.h"
81
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
82
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
83
#include "llvm/Transforms/Scalar/BDCE.h"
84
#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
85
#include "llvm/Transforms/Scalar/ConstraintElimination.h"
86
#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
87
#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
88
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
89
#include "llvm/Transforms/Scalar/DivRemPairs.h"
90
#include "llvm/Transforms/Scalar/EarlyCSE.h"
91
#include "llvm/Transforms/Scalar/Float2Int.h"
92
#include "llvm/Transforms/Scalar/GVN.h"
93
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
94
#include "llvm/Transforms/Scalar/InferAlignment.h"
95
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
96
#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
97
#include "llvm/Transforms/Scalar/JumpThreading.h"
98
#include "llvm/Transforms/Scalar/LICM.h"
99
#include "llvm/Transforms/Scalar/LoopDeletion.h"
100
#include "llvm/Transforms/Scalar/LoopDistribute.h"
101
#include "llvm/Transforms/Scalar/LoopFlatten.h"
102
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
103
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
104
#include "llvm/Transforms/Scalar/LoopInterchange.h"
105
#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
106
#include "llvm/Transforms/Scalar/LoopPassManager.h"
107
#include "llvm/Transforms/Scalar/LoopRotation.h"
108
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
109
#include "llvm/Transforms/Scalar/LoopSink.h"
110
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
111
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
112
#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
113
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
114
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
115
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
116
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
117
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
118
#include "llvm/Transforms/Scalar/NewGVN.h"
119
#include "llvm/Transforms/Scalar/Reassociate.h"
120
#include "llvm/Transforms/Scalar/SCCP.h"
121
#include "llvm/Transforms/Scalar/SROA.h"
122
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
123
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
124
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
125
#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
126
#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
127
#include "llvm/Transforms/Utils/AddDiscriminators.h"
128
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
129
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
130
#include "llvm/Transforms/Utils/CountVisits.h"
131
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
132
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
133
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
134
#include "llvm/Transforms/Utils/Mem2Reg.h"
135
#include "llvm/Transforms/Utils/MoveAutoInit.h"
136
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
137
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
138
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
139
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
140
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
141
#include "llvm/Transforms/Vectorize/VectorCombine.h"
142

143
using namespace llvm;
144

145
static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
146
    "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
147
    cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
148
    cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
149
                          "Heuristics-based inliner version"),
150
               clEnumValN(InliningAdvisorMode::Development, "development",
151
                          "Use development mode (runtime-loadable model)"),
152
               clEnumValN(InliningAdvisorMode::Release, "release",
153
                          "Use release mode (AOT-compiled model)")));
154

155
static cl::opt<bool> EnableSyntheticCounts(
156
    "enable-npm-synthetic-counts", cl::Hidden,
157
    cl::desc("Run synthetic function entry count generation "
158
             "pass"));
159

160
/// Flag to enable inline deferral during PGO.
161
static cl::opt<bool>
162
    EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
163
                            cl::Hidden,
164
                            cl::desc("Enable inline deferral during PGO"));
165

166
static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
167
                                         cl::init(false), cl::Hidden,
168
                                         cl::desc("Enable module inliner"));
169

170
static cl::opt<bool> PerformMandatoryInliningsFirst(
171
    "mandatory-inlining-first", cl::init(false), cl::Hidden,
172
    cl::desc("Perform mandatory inlinings module-wide, before performing "
173
             "inlining"));
174

175
static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
176
    "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
177
    cl::desc("Eagerly invalidate more analyses in default pipelines"));
178

179
static cl::opt<bool> EnableMergeFunctions(
180
    "enable-merge-functions", cl::init(false), cl::Hidden,
181
    cl::desc("Enable function merging as part of the optimization pipeline"));
182

183
static cl::opt<bool> EnablePostPGOLoopRotation(
184
    "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
185
    cl::desc("Run the loop rotation transformation after PGO instrumentation"));
186

187
static cl::opt<bool> EnableGlobalAnalyses(
188
    "enable-global-analyses", cl::init(true), cl::Hidden,
189
    cl::desc("Enable inter-procedural analyses"));
190

191
static cl::opt<bool>
192
    RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
193
                       cl::desc("Run Partial inlinining pass"));
194

195
static cl::opt<bool> ExtraVectorizerPasses(
196
    "extra-vectorizer-passes", cl::init(false), cl::Hidden,
197
    cl::desc("Run cleanup optimization passes after vectorization"));
198

199
static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
200
                               cl::desc("Run the NewGVN pass"));
201

202
static cl::opt<bool> EnableLoopInterchange(
203
    "enable-loopinterchange", cl::init(false), cl::Hidden,
204
    cl::desc("Enable the experimental LoopInterchange Pass"));
205

206
static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
207
                                        cl::init(false), cl::Hidden,
208
                                        cl::desc("Enable Unroll And Jam Pass"));
209

210
static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
211
                                       cl::Hidden,
212
                                       cl::desc("Enable the LoopFlatten Pass"));
213

214
// Experimentally allow loop header duplication. This should allow for better
215
// optimization at Oz, since loop-idiom recognition can then recognize things
216
// like memcpy. If this ends up being useful for many targets, we should drop
217
// this flag and make a code generation option that can be controlled
218
// independent of the opt level and exposed through the frontend.
219
static cl::opt<bool> EnableLoopHeaderDuplication(
220
    "enable-loop-header-duplication", cl::init(false), cl::Hidden,
221
    cl::desc("Enable loop header duplication at any optimization level"));
222

223
static cl::opt<bool>
224
    EnableDFAJumpThreading("enable-dfa-jump-thread",
225
                           cl::desc("Enable DFA jump threading"),
226
                           cl::init(false), cl::Hidden);
227

228
// TODO: turn on and remove flag
229
static cl::opt<bool> EnablePGOForceFunctionAttrs(
230
    "enable-pgo-force-function-attrs",
231
    cl::desc("Enable pass to set function attributes based on PGO profiles"),
232
    cl::init(false));
233

234
static cl::opt<bool>
235
    EnableHotColdSplit("hot-cold-split",
236
                       cl::desc("Enable hot-cold splitting pass"));
237

238
static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
239
                                      cl::Hidden,
240
                                      cl::desc("Enable ir outliner pass"));
241

242
static cl::opt<bool>
243
    DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
244
                      cl::desc("Disable pre-instrumentation inliner"));
245

246
static cl::opt<int> PreInlineThreshold(
247
    "preinline-threshold", cl::Hidden, cl::init(75),
248
    cl::desc("Control the amount of inlining in pre-instrumentation inliner "
249
             "(default = 75)"));
250

251
static cl::opt<bool>
252
    EnableGVNHoist("enable-gvn-hoist",
253
                   cl::desc("Enable the GVN hoisting pass (default = off)"));
254

255
static cl::opt<bool>
256
    EnableGVNSink("enable-gvn-sink",
257
                  cl::desc("Enable the GVN sinking pass (default = off)"));
258

259
static cl::opt<bool> EnableJumpTableToSwitch(
260
    "enable-jump-table-to-switch",
261
    cl::desc("Enable JumpTableToSwitch pass (default = off)"));
262

263
// This option is used in simplifying testing SampleFDO optimizations for
264
// profile loading.
265
static cl::opt<bool>
266
    EnableCHR("enable-chr", cl::init(true), cl::Hidden,
267
              cl::desc("Enable control height reduction optimization (CHR)"));
268

269
static cl::opt<bool> FlattenedProfileUsed(
270
    "flattened-profile-used", cl::init(false), cl::Hidden,
271
    cl::desc("Indicate the sample profile being used is flattened, i.e., "
272
             "no inline hierachy exists in the profile"));
273

274
static cl::opt<bool> EnableOrderFileInstrumentation(
275
    "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
276
    cl::desc("Enable order file instrumentation (default = off)"));
277

278
static cl::opt<bool>
279
    EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
280
                 cl::desc("Enable lowering of the matrix intrinsics"));
281

282
static cl::opt<bool> EnableConstraintElimination(
283
    "enable-constraint-elimination", cl::init(true), cl::Hidden,
284
    cl::desc(
285
        "Enable pass to eliminate conditions based on linear constraints"));
286

287
static cl::opt<AttributorRunOption> AttributorRun(
288
    "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
289
    cl::desc("Enable the attributor inter-procedural deduction pass"),
290
    cl::values(clEnumValN(AttributorRunOption::ALL, "all",
291
                          "enable all attributor runs"),
292
               clEnumValN(AttributorRunOption::MODULE, "module",
293
                          "enable module-wide attributor runs"),
294
               clEnumValN(AttributorRunOption::CGSCC, "cgscc",
295
                          "enable call graph SCC attributor runs"),
296
               clEnumValN(AttributorRunOption::NONE, "none",
297
                          "disable attributor runs")));
298

299
static cl::opt<bool> EnableSampledInstr(
300
    "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
301
    cl::desc("Enable profile instrumentation sampling (default = off)"));
302
static cl::opt<bool> UseLoopVersioningLICM(
303
    "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
304
    cl::desc("Enable the experimental Loop Versioning LICM pass"));
305

306
namespace llvm {
307
extern cl::opt<bool> EnableMemProfContextDisambiguation;
308

309
extern cl::opt<bool> EnableInferAlignmentPass;
310
} // namespace llvm
311

312
PipelineTuningOptions::PipelineTuningOptions() {
313
  LoopInterleaving = true;
314
  LoopVectorization = true;
315
  SLPVectorization = false;
316
  LoopUnrolling = true;
317
  ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
318
  LicmMssaOptCap = SetLicmMssaOptCap;
319
  LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
320
  CallGraphProfile = true;
321
  UnifiedLTO = false;
322
  MergeFunctions = EnableMergeFunctions;
323
  InlinerThreshold = -1;
324
  EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
325
}
326

327
namespace llvm {
328
extern cl::opt<unsigned> MaxDevirtIterations;
329
} // namespace llvm
330

331
void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
332
                                            OptimizationLevel Level) {
333
  for (auto &C : PeepholeEPCallbacks)
334
    C(FPM, Level);
335
}
336
void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
337
    LoopPassManager &LPM, OptimizationLevel Level) {
338
  for (auto &C : LateLoopOptimizationsEPCallbacks)
339
    C(LPM, Level);
340
}
341
void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
342
                                                    OptimizationLevel Level) {
343
  for (auto &C : LoopOptimizerEndEPCallbacks)
344
    C(LPM, Level);
345
}
346
void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
347
    FunctionPassManager &FPM, OptimizationLevel Level) {
348
  for (auto &C : ScalarOptimizerLateEPCallbacks)
349
    C(FPM, Level);
350
}
351
void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
352
                                                      OptimizationLevel Level) {
353
  for (auto &C : CGSCCOptimizerLateEPCallbacks)
354
    C(CGPM, Level);
355
}
356
void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
357
                                                   OptimizationLevel Level) {
358
  for (auto &C : VectorizerStartEPCallbacks)
359
    C(FPM, Level);
360
}
361
void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
362
                                                  OptimizationLevel Level) {
363
  for (auto &C : OptimizerEarlyEPCallbacks)
364
    C(MPM, Level);
365
}
366
void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
367
                                                 OptimizationLevel Level) {
368
  for (auto &C : OptimizerLastEPCallbacks)
369
    C(MPM, Level);
370
}
371
void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
372
    ModulePassManager &MPM, OptimizationLevel Level) {
373
  for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
374
    C(MPM, Level);
375
}
376
void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
377
    ModulePassManager &MPM, OptimizationLevel Level) {
378
  for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
379
    C(MPM, Level);
380
}
381
void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
382
                                                 OptimizationLevel Level) {
383
  for (auto &C : PipelineStartEPCallbacks)
384
    C(MPM, Level);
385
}
386
void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
387
    ModulePassManager &MPM, OptimizationLevel Level) {
388
  for (auto &C : PipelineEarlySimplificationEPCallbacks)
389
    C(MPM, Level);
390
}
391

392
// Helper to add AnnotationRemarksPass.
393
static void addAnnotationRemarksPass(ModulePassManager &MPM) {
394
  MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
395
}
396

397
// Helper to check if the current compilation phase is preparing for LTO
398
static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
399
  return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
400
         Phase == ThinOrFullLTOPhase::FullLTOPreLink;
401
}
402

403
// TODO: Investigate the cost/benefit of tail call elimination on debugging.
404
FunctionPassManager
405
PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
406
                                                   ThinOrFullLTOPhase Phase) {
407

408
  FunctionPassManager FPM;
409

410
  if (AreStatisticsEnabled())
411
    FPM.addPass(CountVisitsPass());
412

413
  // Form SSA out of local memory accesses after breaking apart aggregates into
414
  // scalars.
415
  FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
416

417
  // Catch trivial redundancies
418
  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
419

420
  // Hoisting of scalars and load expressions.
421
  FPM.addPass(
422
      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
423
  FPM.addPass(InstCombinePass());
424

425
  FPM.addPass(LibCallsShrinkWrapPass());
426

427
  invokePeepholeEPCallbacks(FPM, Level);
428

429
  FPM.addPass(
430
      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
431

432
  // Form canonically associated expression trees, and simplify the trees using
433
  // basic mathematical properties. For example, this will form (nearly)
434
  // minimal multiplication trees.
435
  FPM.addPass(ReassociatePass());
436

437
  // Add the primary loop simplification pipeline.
438
  // FIXME: Currently this is split into two loop pass pipelines because we run
439
  // some function passes in between them. These can and should be removed
440
  // and/or replaced by scheduling the loop pass equivalents in the correct
441
  // positions. But those equivalent passes aren't powerful enough yet.
442
  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
443
  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
444
  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
445
  // `LoopInstSimplify`.
446
  LoopPassManager LPM1, LPM2;
447

448
  // Simplify the loop body. We do this initially to clean up after other loop
449
  // passes run, either when iterating on a loop or on inner loops with
450
  // implications on the outer loop.
451
  LPM1.addPass(LoopInstSimplifyPass());
452
  LPM1.addPass(LoopSimplifyCFGPass());
453

454
  // Try to remove as much code from the loop header as possible,
455
  // to reduce amount of IR that will have to be duplicated. However,
456
  // do not perform speculative hoisting the first time as LICM
457
  // will destroy metadata that may not need to be destroyed if run
458
  // after loop rotation.
459
  // TODO: Investigate promotion cap for O1.
460
  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
461
                        /*AllowSpeculation=*/false));
462

463
  LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
464
                              isLTOPreLink(Phase)));
465
  // TODO: Investigate promotion cap for O1.
466
  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
467
                        /*AllowSpeculation=*/true));
468
  LPM1.addPass(SimpleLoopUnswitchPass());
469
  if (EnableLoopFlatten)
470
    LPM1.addPass(LoopFlattenPass());
471

472
  LPM2.addPass(LoopIdiomRecognizePass());
473
  LPM2.addPass(IndVarSimplifyPass());
474

475
  invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
476

477
  LPM2.addPass(LoopDeletionPass());
478

479
  if (EnableLoopInterchange)
480
    LPM2.addPass(LoopInterchangePass());
481

482
  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
483
  // because it changes IR to makes profile annotation in back compile
484
  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
485
  // attributes so we need to make sure and allow the full unroll pass to pay
486
  // attention to it.
487
  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
488
      PGOOpt->Action != PGOOptions::SampleUse)
489
    LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
490
                                    /* OnlyWhenForced= */ !PTO.LoopUnrolling,
491
                                    PTO.ForgetAllSCEVInLoopUnroll));
492

493
  invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
494

495
  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
496
                                              /*UseMemorySSA=*/true,
497
                                              /*UseBlockFrequencyInfo=*/true));
498
  FPM.addPass(
499
      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
500
  FPM.addPass(InstCombinePass());
501
  // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
502
  // *All* loop passes must preserve it, in order to be able to use it.
503
  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
504
                                              /*UseMemorySSA=*/false,
505
                                              /*UseBlockFrequencyInfo=*/false));
506

507
  // Delete small array after loop unroll.
508
  FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
509

510
  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
511
  FPM.addPass(MemCpyOptPass());
512

513
  // Sparse conditional constant propagation.
514
  // FIXME: It isn't clear why we do this *after* loop passes rather than
515
  // before...
516
  FPM.addPass(SCCPPass());
517

518
  // Delete dead bit computations (instcombine runs after to fold away the dead
519
  // computations, and then ADCE will run later to exploit any new DCE
520
  // opportunities that creates).
521
  FPM.addPass(BDCEPass());
522

523
  // Run instcombine after redundancy and dead bit elimination to exploit
524
  // opportunities opened up by them.
525
  FPM.addPass(InstCombinePass());
526
  invokePeepholeEPCallbacks(FPM, Level);
527

528
  FPM.addPass(CoroElidePass());
529

530
  invokeScalarOptimizerLateEPCallbacks(FPM, Level);
531

532
  // Finally, do an expensive DCE pass to catch all the dead code exposed by
533
  // the simplifications and basic cleanup after all the simplifications.
534
  // TODO: Investigate if this is too expensive.
535
  FPM.addPass(ADCEPass());
536
  FPM.addPass(
537
      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
538
  FPM.addPass(InstCombinePass());
539
  invokePeepholeEPCallbacks(FPM, Level);
540

541
  return FPM;
542
}
543

544
FunctionPassManager
545
PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
546
                                                 ThinOrFullLTOPhase Phase) {
547
  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
548

549
  // The O1 pipeline has a separate pipeline creation function to simplify
550
  // construction readability.
551
  if (Level.getSpeedupLevel() == 1)
552
    return buildO1FunctionSimplificationPipeline(Level, Phase);
553

554
  FunctionPassManager FPM;
555

556
  if (AreStatisticsEnabled())
557
    FPM.addPass(CountVisitsPass());
558

559
  // Form SSA out of local memory accesses after breaking apart aggregates into
560
  // scalars.
561
  FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
562

563
  // Catch trivial redundancies
564
  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
565
  if (EnableKnowledgeRetention)
566
    FPM.addPass(AssumeSimplifyPass());
567

568
  // Hoisting of scalars and load expressions.
569
  if (EnableGVNHoist)
570
    FPM.addPass(GVNHoistPass());
571

572
  // Global value numbering based sinking.
573
  if (EnableGVNSink) {
574
    FPM.addPass(GVNSinkPass());
575
    FPM.addPass(
576
        SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
577
  }
578

579
  // Speculative execution if the target has divergent branches; otherwise nop.
580
  FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
581

582
  // Optimize based on known information about branches, and cleanup afterward.
583
  FPM.addPass(JumpThreadingPass());
584
  FPM.addPass(CorrelatedValuePropagationPass());
585

586
  // Jump table to switch conversion.
587
  if (EnableJumpTableToSwitch)
588
    FPM.addPass(JumpTableToSwitchPass());
589

590
  FPM.addPass(
591
      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
592
  FPM.addPass(InstCombinePass());
593
  FPM.addPass(AggressiveInstCombinePass());
594

595
  if (!Level.isOptimizingForSize())
596
    FPM.addPass(LibCallsShrinkWrapPass());
597

598
  invokePeepholeEPCallbacks(FPM, Level);
599

600
  // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
601
  // using the size value profile. Don't perform this when optimizing for size.
602
  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
603
      !Level.isOptimizingForSize())
604
    FPM.addPass(PGOMemOPSizeOpt());
605

606
  FPM.addPass(TailCallElimPass());
607
  FPM.addPass(
608
      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
609

610
  // Form canonically associated expression trees, and simplify the trees using
611
  // basic mathematical properties. For example, this will form (nearly)
612
  // minimal multiplication trees.
613
  FPM.addPass(ReassociatePass());
614

615
  if (EnableConstraintElimination)
616
    FPM.addPass(ConstraintEliminationPass());
617

618
  // Add the primary loop simplification pipeline.
619
  // FIXME: Currently this is split into two loop pass pipelines because we run
620
  // some function passes in between them. These can and should be removed
621
  // and/or replaced by scheduling the loop pass equivalents in the correct
622
  // positions. But those equivalent passes aren't powerful enough yet.
623
  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
624
  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
625
  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
626
  // `LoopInstSimplify`.
627
  LoopPassManager LPM1, LPM2;
628

629
  // Simplify the loop body. We do this initially to clean up after other loop
630
  // passes run, either when iterating on a loop or on inner loops with
631
  // implications on the outer loop.
632
  LPM1.addPass(LoopInstSimplifyPass());
633
  LPM1.addPass(LoopSimplifyCFGPass());
634

635
  // Try to remove as much code from the loop header as possible,
636
  // to reduce amount of IR that will have to be duplicated. However,
637
  // do not perform speculative hoisting the first time as LICM
638
  // will destroy metadata that may not need to be destroyed if run
639
  // after loop rotation.
640
  // TODO: Investigate promotion cap for O1.
641
  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
642
                        /*AllowSpeculation=*/false));
643

644
  // Disable header duplication in loop rotation at -Oz.
645
  LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
646
                                  Level != OptimizationLevel::Oz,
647
                              isLTOPreLink(Phase)));
648
  // TODO: Investigate promotion cap for O1.
649
  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
650
                        /*AllowSpeculation=*/true));
651
  LPM1.addPass(
652
      SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
653
  if (EnableLoopFlatten)
654
    LPM1.addPass(LoopFlattenPass());
655

656
  LPM2.addPass(LoopIdiomRecognizePass());
657
  LPM2.addPass(IndVarSimplifyPass());
658

659
  {
660
    ExtraSimpleLoopUnswitchPassManager ExtraPasses;
661
    ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
662
                                               OptimizationLevel::O3));
663
    LPM2.addPass(std::move(ExtraPasses));
664
  }
665

666
  invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
667

668
  LPM2.addPass(LoopDeletionPass());
669

670
  if (EnableLoopInterchange)
671
    LPM2.addPass(LoopInterchangePass());
672

673
  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
674
  // because it changes IR to makes profile annotation in back compile
675
  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
676
  // attributes so we need to make sure and allow the full unroll pass to pay
677
  // attention to it.
678
  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
679
      PGOOpt->Action != PGOOptions::SampleUse)
680
    LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
681
                                    /* OnlyWhenForced= */ !PTO.LoopUnrolling,
682
                                    PTO.ForgetAllSCEVInLoopUnroll));
683

684
  invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
685

686
  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
687
                                              /*UseMemorySSA=*/true,
688
                                              /*UseBlockFrequencyInfo=*/true));
689
  FPM.addPass(
690
      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
691
  FPM.addPass(InstCombinePass());
692
  // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
693
  // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
694
  // *All* loop passes must preserve it, in order to be able to use it.
695
  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
696
                                              /*UseMemorySSA=*/false,
697
                                              /*UseBlockFrequencyInfo=*/false));
698

699
  // Delete small array after loop unroll.
700
  FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
701

702
  // Try vectorization/scalarization transforms that are both improvements
703
  // themselves and can allow further folds with GVN and InstCombine.
704
  FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
705

706
  // Eliminate redundancies.
707
  FPM.addPass(MergedLoadStoreMotionPass());
708
  if (RunNewGVN)
709
    FPM.addPass(NewGVNPass());
710
  else
711
    FPM.addPass(GVNPass());
712

713
  // Sparse conditional constant propagation.
714
  // FIXME: It isn't clear why we do this *after* loop passes rather than
715
  // before...
716
  FPM.addPass(SCCPPass());
717

718
  // Delete dead bit computations (instcombine runs after to fold away the dead
719
  // computations, and then ADCE will run later to exploit any new DCE
720
  // opportunities that creates).
721
  FPM.addPass(BDCEPass());
722

723
  // Run instcombine after redundancy and dead bit elimination to exploit
724
  // opportunities opened up by them.
725
  FPM.addPass(InstCombinePass());
726
  invokePeepholeEPCallbacks(FPM, Level);
727

728
  // Re-consider control flow based optimizations after redundancy elimination,
729
  // redo DCE, etc.
730
  if (EnableDFAJumpThreading)
731
    FPM.addPass(DFAJumpThreadingPass());
732

733
  FPM.addPass(JumpThreadingPass());
734
  FPM.addPass(CorrelatedValuePropagationPass());
735

736
  // Finally, do an expensive DCE pass to catch all the dead code exposed by
737
  // the simplifications and basic cleanup after all the simplifications.
738
  // TODO: Investigate if this is too expensive.
739
  FPM.addPass(ADCEPass());
740

741
  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
742
  FPM.addPass(MemCpyOptPass());
743

744
  FPM.addPass(DSEPass());
745
  FPM.addPass(MoveAutoInitPass());
746

747
  FPM.addPass(createFunctionToLoopPassAdaptor(
748
      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
749
               /*AllowSpeculation=*/true),
750
      /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
751

752
  FPM.addPass(CoroElidePass());
753

754
  invokeScalarOptimizerLateEPCallbacks(FPM, Level);
755

756
  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
757
                                  .convertSwitchRangeToICmp(true)
758
                                  .hoistCommonInsts(true)
759
                                  .sinkCommonInsts(true)));
760
  FPM.addPass(InstCombinePass());
761
  invokePeepholeEPCallbacks(FPM, Level);
762

763
  return FPM;
764
}
765

766
void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
767
  MPM.addPass(CanonicalizeAliasesPass());
768
  MPM.addPass(NameAnonGlobalPass());
769
}
770

771
void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
772
                                      OptimizationLevel Level,
773
                                      ThinOrFullLTOPhase LTOPhase) {
774
  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
775
  if (DisablePreInliner)
776
    return;
777
  InlineParams IP;
778

779
  IP.DefaultThreshold = PreInlineThreshold;
780

781
  // FIXME: The hint threshold has the same value used by the regular inliner
782
  // when not optimzing for size. This should probably be lowered after
783
  // performance testing.
784
  // FIXME: this comment is cargo culted from the old pass manager, revisit).
785
  IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
786
  ModuleInlinerWrapperPass MIWP(
787
      IP, /* MandatoryFirst */ true,
788
      InlineContext{LTOPhase, InlinePass::EarlyInliner});
789
  CGSCCPassManager &CGPipeline = MIWP.getPM();
790

791
  FunctionPassManager FPM;
792
  FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
793
  FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
794
  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
795
      true)));                    // Merge & remove basic blocks.
796
  FPM.addPass(InstCombinePass()); // Combine silly sequences.
797
  invokePeepholeEPCallbacks(FPM, Level);
798

799
  CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
800
      std::move(FPM), PTO.EagerlyInvalidateAnalyses));
801

802
  MPM.addPass(std::move(MIWP));
803

804
  // Delete anything that is now dead to make sure that we don't instrument
805
  // dead code. Instrumentation can end up keeping dead code around and
806
  // dramatically increase code size.
807
  MPM.addPass(GlobalDCEPass());
808
}
809

810
void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
811
                                         OptimizationLevel Level) {
812
  if (EnablePostPGOLoopRotation) {
813
    // Disable header duplication in loop rotation at -Oz.
814
    MPM.addPass(createModuleToFunctionPassAdaptor(
815
        createFunctionToLoopPassAdaptor(
816
            LoopRotatePass(EnableLoopHeaderDuplication ||
817
                           Level != OptimizationLevel::Oz),
818
            /*UseMemorySSA=*/false,
819
            /*UseBlockFrequencyInfo=*/false),
820
        PTO.EagerlyInvalidateAnalyses));
821
  }
822
}
823

824
void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
825
                                    OptimizationLevel Level, bool RunProfileGen,
826
                                    bool IsCS, bool AtomicCounterUpdate,
827
                                    std::string ProfileFile,
828
                                    std::string ProfileRemappingFile,
829
                                    IntrusiveRefCntPtr<vfs::FileSystem> FS) {
830
  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
831

832
  if (!RunProfileGen) {
833
    assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
834
    MPM.addPass(
835
        PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
836
    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
837
    // RequireAnalysisPass for PSI before subsequent non-module passes.
838
    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
839
    return;
840
  }
841

842
  // Perform PGO instrumentation.
843
  MPM.addPass(PGOInstrumentationGen(IsCS));
844

845
  addPostPGOLoopRotation(MPM, Level);
846
  // Add the profile lowering pass.
847
  InstrProfOptions Options;
848
  if (!ProfileFile.empty())
849
    Options.InstrProfileOutput = ProfileFile;
850
  // Do counter promotion at Level greater than O0.
851
  Options.DoCounterPromotion = true;
852
  Options.UseBFIInPromotion = IsCS;
853
  if (EnableSampledInstr) {
854
    Options.Sampling = true;
855
    // With sampling, there is little beneifit to enable counter promotion.
856
    // But note that sampling does work with counter promotion.
857
    Options.DoCounterPromotion = false;
858
  }
859
  Options.Atomic = AtomicCounterUpdate;
860
  MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
861
}
862

863
void PassBuilder::addPGOInstrPassesForO0(
864
    ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
865
    bool AtomicCounterUpdate, std::string ProfileFile,
866
    std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
867
  if (!RunProfileGen) {
868
    assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
869
    MPM.addPass(
870
        PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
871
    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
872
    // RequireAnalysisPass for PSI before subsequent non-module passes.
873
    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
874
    return;
875
  }
876

877
  // Perform PGO instrumentation.
878
  MPM.addPass(PGOInstrumentationGen(IsCS));
879
  // Add the profile lowering pass.
880
  InstrProfOptions Options;
881
  if (!ProfileFile.empty())
882
    Options.InstrProfileOutput = ProfileFile;
883
  // Do not do counter promotion at O0.
884
  Options.DoCounterPromotion = false;
885
  Options.UseBFIInPromotion = IsCS;
886
  Options.Atomic = AtomicCounterUpdate;
887
  MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
888
}
889

890
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
891
  return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
892
}
893

894
ModuleInlinerWrapperPass
895
PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
896
                                  ThinOrFullLTOPhase Phase) {
897
  InlineParams IP;
898
  if (PTO.InlinerThreshold == -1)
899
    IP = getInlineParamsFromOptLevel(Level);
900
  else
901
    IP = getInlineParams(PTO.InlinerThreshold);
902
  // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
903
  // disable hot callsite inline (as much as possible [1]) because it makes
904
  // profile annotation in the backend inaccurate.
905
  //
906
  // [1] Note the cost of a function could be below zero due to erased
907
  // prologue / epilogue.
908
  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
909
      PGOOpt->Action == PGOOptions::SampleUse)
910
    IP.HotCallSiteThreshold = 0;
911

912
  if (PGOOpt)
913
    IP.EnableDeferral = EnablePGOInlineDeferral;
914

915
  ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
916
                                InlineContext{Phase, InlinePass::CGSCCInliner},
917
                                UseInlineAdvisor, MaxDevirtIterations);
918

919
  // Require the GlobalsAA analysis for the module so we can query it within
920
  // the CGSCC pipeline.
921
  if (EnableGlobalAnalyses) {
922
    MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
923
    // Invalidate AAManager so it can be recreated and pick up the newly
924
    // available GlobalsAA.
925
    MIWP.addModulePass(
926
        createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
927
  }
928

929
  // Require the ProfileSummaryAnalysis for the module so we can query it within
930
  // the inliner pass.
931
  MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
932

933
  // Now begin the main postorder CGSCC pipeline.
934
  // FIXME: The current CGSCC pipeline has its origins in the legacy pass
935
  // manager and trying to emulate its precise behavior. Much of this doesn't
936
  // make a lot of sense and we should revisit the core CGSCC structure.
937
  CGSCCPassManager &MainCGPipeline = MIWP.getPM();
938

939
  // Note: historically, the PruneEH pass was run first to deduce nounwind and
940
  // generally clean up exception handling overhead. It isn't clear this is
941
  // valuable as the inliner doesn't currently care whether it is inlining an
942
  // invoke or a call.
943

944
  if (AttributorRun & AttributorRunOption::CGSCC)
945
    MainCGPipeline.addPass(AttributorCGSCCPass());
946

947
  // Deduce function attributes. We do another run of this after the function
948
  // simplification pipeline, so this only needs to run when it could affect the
949
  // function simplification pipeline, which is only the case with recursive
950
  // functions.
951
  MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
952

953
  // When at O3 add argument promotion to the pass pipeline.
954
  // FIXME: It isn't at all clear why this should be limited to O3.
955
  if (Level == OptimizationLevel::O3)
956
    MainCGPipeline.addPass(ArgumentPromotionPass());
957

958
  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
959
  // there are no OpenMP runtime calls present in the module.
960
  if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
961
    MainCGPipeline.addPass(OpenMPOptCGSCCPass());
962

963
  invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
964

965
  // Add the core function simplification pipeline nested inside the
966
  // CGSCC walk.
967
  MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
968
      buildFunctionSimplificationPipeline(Level, Phase),
969
      PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
970

971
  // Finally, deduce any function attributes based on the fully simplified
972
  // function.
973
  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
974

975
  // Mark that the function is fully simplified and that it shouldn't be
976
  // simplified again if we somehow revisit it due to CGSCC mutations unless
977
  // it's been modified since.
978
  MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
979
      RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
980

981
  MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
982

983
  // Make sure we don't affect potential future NoRerun CGSCC adaptors.
984
  MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
985
      InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
986

987
  return MIWP;
988
}
989

990
ModulePassManager
991
PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
992
                                        ThinOrFullLTOPhase Phase) {
993
  ModulePassManager MPM;
994

995
  InlineParams IP = getInlineParamsFromOptLevel(Level);
996
  // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
997
  // disable hot callsite inline (as much as possible [1]) because it makes
998
  // profile annotation in the backend inaccurate.
999
  //
1000
  // [1] Note the cost of a function could be below zero due to erased
1001
  // prologue / epilogue.
1002
  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
1003
      PGOOpt->Action == PGOOptions::SampleUse)
1004
    IP.HotCallSiteThreshold = 0;
1005

1006
  if (PGOOpt)
1007
    IP.EnableDeferral = EnablePGOInlineDeferral;
1008

1009
  // The inline deferral logic is used to avoid losing some
1010
  // inlining chance in future. It is helpful in SCC inliner, in which
1011
  // inlining is processed in bottom-up order.
1012
  // While in module inliner, the inlining order is a priority-based order
1013
  // by default. The inline deferral is unnecessary there. So we disable the
1014
  // inline deferral logic in module inliner.
1015
  IP.EnableDeferral = false;
1016

1017
  MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1018

1019
  MPM.addPass(createModuleToFunctionPassAdaptor(
1020
      buildFunctionSimplificationPipeline(Level, Phase),
1021
      PTO.EagerlyInvalidateAnalyses));
1022

1023
  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
1024
      CoroSplitPass(Level != OptimizationLevel::O0)));
1025

1026
  return MPM;
1027
}
1028

1029
ModulePassManager
1030
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1031
                                               ThinOrFullLTOPhase Phase) {
1032
  assert(Level != OptimizationLevel::O0 &&
1033
         "Should not be used for O0 pipeline");
1034

1035
  assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1036
         "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1037

1038
  ModulePassManager MPM;
1039

1040
  // Place pseudo probe instrumentation as the first pass of the pipeline to
1041
  // minimize the impact of optimization changes.
1042
  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1043
      Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1044
    MPM.addPass(SampleProfileProbePass(TM));
1045

1046
  bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1047

1048
  // In ThinLTO mode, when flattened profile is used, all the available
1049
  // profile information will be annotated in PreLink phase so there is
1050
  // no need to load the profile again in PostLink.
1051
  bool LoadSampleProfile =
1052
      HasSampleProfile &&
1053
      !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1054

1055
  // During the ThinLTO backend phase we perform early indirect call promotion
1056
  // here, before globalopt. Otherwise imported available_externally functions
1057
  // look unreferenced and are removed. If we are going to load the sample
1058
  // profile then defer until later.
1059
  // TODO: See if we can move later and consolidate with the location where
1060
  // we perform ICP when we are loading a sample profile.
1061
  // TODO: We pass HasSampleProfile (whether there was a sample profile file
1062
  // passed to the compile) to the SamplePGO flag of ICP. This is used to
1063
  // determine whether the new direct calls are annotated with prof metadata.
1064
  // Ideally this should be determined from whether the IR is annotated with
1065
  // sample profile, and not whether the a sample profile was provided on the
1066
  // command line. E.g. for flattened profiles where we will not be reloading
1067
  // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1068
  // provide the sample profile file.
1069
  if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1070
    MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1071

1072
  // Create an early function pass manager to cleanup the output of the
1073
  // frontend. Not necessary with LTO post link pipelines since the pre link
1074
  // pipeline already cleaned up the frontend output.
1075
  if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1076
    // Do basic inference of function attributes from known properties of system
1077
    // libraries and other oracles.
1078
    MPM.addPass(InferFunctionAttrsPass());
1079
    MPM.addPass(CoroEarlyPass());
1080

1081
    FunctionPassManager EarlyFPM;
1082
    EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1083
    // Lower llvm.expect to metadata before attempting transforms.
1084
    // Compare/branch metadata may alter the behavior of passes like
1085
    // SimplifyCFG.
1086
    EarlyFPM.addPass(LowerExpectIntrinsicPass());
1087
    EarlyFPM.addPass(SimplifyCFGPass());
1088
    EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
1089
    EarlyFPM.addPass(EarlyCSEPass());
1090
    if (Level == OptimizationLevel::O3)
1091
      EarlyFPM.addPass(CallSiteSplittingPass());
1092
    MPM.addPass(createModuleToFunctionPassAdaptor(
1093
        std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1094
  }
1095

1096
  if (LoadSampleProfile) {
1097
    // Annotate sample profile right after early FPM to ensure freshness of
1098
    // the debug info.
1099
    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1100
                                        PGOOpt->ProfileRemappingFile, Phase));
1101
    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1102
    // RequireAnalysisPass for PSI before subsequent non-module passes.
1103
    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1104
    // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1105
    // for the profile annotation to be accurate in the LTO backend.
1106
    if (!isLTOPreLink(Phase))
1107
      // We perform early indirect call promotion here, before globalopt.
1108
      // This is important for the ThinLTO backend phase because otherwise
1109
      // imported available_externally functions look unreferenced and are
1110
      // removed.
1111
      MPM.addPass(
1112
          PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1113
  }
1114

1115
  // Try to perform OpenMP specific optimizations on the module. This is a
1116
  // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1117
  MPM.addPass(OpenMPOptPass());
1118

1119
  if (AttributorRun & AttributorRunOption::MODULE)
1120
    MPM.addPass(AttributorPass());
1121

1122
  // Lower type metadata and the type.test intrinsic in the ThinLTO
1123
  // post link pipeline after ICP. This is to enable usage of the type
1124
  // tests in ICP sequences.
1125
  if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1126
    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1127

1128
  invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
1129

1130
  // Interprocedural constant propagation now that basic cleanup has occurred
1131
  // and prior to optimizing globals.
1132
  // FIXME: This position in the pipeline hasn't been carefully considered in
1133
  // years, it should be re-analyzed.
1134
  MPM.addPass(IPSCCPPass(
1135
              IPSCCPOptions(/*AllowFuncSpec=*/
1136
                            Level != OptimizationLevel::Os &&
1137
                            Level != OptimizationLevel::Oz &&
1138
                            !isLTOPreLink(Phase))));
1139

1140
  // Attach metadata to indirect call sites indicating the set of functions
1141
  // they may target at run-time. This should follow IPSCCP.
1142
  MPM.addPass(CalledValuePropagationPass());
1143

1144
  // Optimize globals to try and fold them into constants.
1145
  MPM.addPass(GlobalOptPass());
1146

1147
  // Create a small function pass pipeline to cleanup after all the global
1148
  // optimizations.
1149
  FunctionPassManager GlobalCleanupPM;
1150
  // FIXME: Should this instead by a run of SROA?
1151
  GlobalCleanupPM.addPass(PromotePass());
1152
  GlobalCleanupPM.addPass(InstCombinePass());
1153
  invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1154
  GlobalCleanupPM.addPass(
1155
      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1156
  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1157
                                                PTO.EagerlyInvalidateAnalyses));
1158

1159
  // We already asserted this happens in non-FullLTOPostLink earlier.
1160
  const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1161
  const bool IsPGOPreLink = PGOOpt && IsPreLink;
1162
  const bool IsPGOInstrGen =
1163
      IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1164
  const bool IsPGOInstrUse =
1165
      IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1166
  const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1167
  // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1168
  // enable ctx profiling from the frontend.
1169
  assert(
1170
      !(IsPGOInstrGen && PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) &&
1171
      "Enabling both instrumented FDO and contextual instrumentation is not "
1172
      "supported.");
1173
  // Enable contextual profiling instrumentation.
1174
  const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1175
                            PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
1176

1177
  if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen)
1178
    addPreInlinerPasses(MPM, Level, Phase);
1179

1180
  // Add all the requested passes for instrumentation PGO, if requested.
1181
  if (IsPGOInstrGen || IsPGOInstrUse) {
1182
    addPGOInstrPasses(MPM, Level,
1183
                      /*RunProfileGen=*/IsPGOInstrGen,
1184
                      /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1185
                      PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1186
                      PGOOpt->FS);
1187
  } else if (IsCtxProfGen) {
1188
    MPM.addPass(PGOInstrumentationGen(false));
1189
    addPostPGOLoopRotation(MPM, Level);
1190
    MPM.addPass(PGOCtxProfLoweringPass());
1191
  }
1192

1193
  if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1194
    MPM.addPass(PGOIndirectCallPromotion(false, false));
1195

1196
  if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1197
    MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1198
                                               EnableSampledInstr));
1199

1200
  if (IsMemprofUse)
1201
    MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1202

1203
  // Synthesize function entry counts for non-PGO compilation.
1204
  if (EnableSyntheticCounts && !PGOOpt)
1205
    MPM.addPass(SyntheticCountsPropagation());
1206

1207
  if (EnablePGOForceFunctionAttrs && PGOOpt)
1208
    MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1209

1210
  MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1211

1212
  if (EnableModuleInliner)
1213
    MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
1214
  else
1215
    MPM.addPass(buildInlinerPipeline(Level, Phase));
1216

1217
  // Remove any dead arguments exposed by cleanups, constant folding globals,
1218
  // and argument promotion.
1219
  MPM.addPass(DeadArgumentEliminationPass());
1220

1221
  MPM.addPass(CoroCleanupPass());
1222

1223
  // Optimize globals now that functions are fully simplified.
1224
  MPM.addPass(GlobalOptPass());
1225
  MPM.addPass(GlobalDCEPass());
1226

1227
  return MPM;
1228
}
1229

1230
/// TODO: Should LTO cause any differences to this set of passes?
1231
void PassBuilder::addVectorPasses(OptimizationLevel Level,
1232
                                  FunctionPassManager &FPM, bool IsFullLTO) {
1233
  FPM.addPass(LoopVectorizePass(
1234
      LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1235

1236
  if (EnableInferAlignmentPass)
1237
    FPM.addPass(InferAlignmentPass());
1238
  if (IsFullLTO) {
1239
    // The vectorizer may have significantly shortened a loop body; unroll
1240
    // again. Unroll small loops to hide loop backedge latency and saturate any
1241
    // parallel execution resources of an out-of-order processor. We also then
1242
    // need to clean up redundancies and loop invariant code.
1243
    // FIXME: It would be really good to use a loop-integrated instruction
1244
    // combiner for cleanup here so that the unrolling and LICM can be pipelined
1245
    // across the loop nests.
1246
    // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1247
    if (EnableUnrollAndJam && PTO.LoopUnrolling)
1248
      FPM.addPass(createFunctionToLoopPassAdaptor(
1249
          LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1250
    FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1251
        Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1252
        PTO.ForgetAllSCEVInLoopUnroll)));
1253
    FPM.addPass(WarnMissedTransformationsPass());
1254
    // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1255
    // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1256
    // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1257
    // NOTE: we are very late in the pipeline, and we don't have any LICM
1258
    // or SimplifyCFG passes scheduled after us, that would cleanup
1259
    // the CFG mess this may created if allowed to modify CFG, so forbid that.
1260
    FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1261
  }
1262

1263
  if (!IsFullLTO) {
1264
    // Eliminate loads by forwarding stores from the previous iteration to loads
1265
    // of the current iteration.
1266
    FPM.addPass(LoopLoadEliminationPass());
1267
  }
1268
  // Cleanup after the loop optimization passes.
1269
  FPM.addPass(InstCombinePass());
1270

1271
  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1272
    ExtraVectorPassManager ExtraPasses;
1273
    // At higher optimization levels, try to clean up any runtime overlap and
1274
    // alignment checks inserted by the vectorizer. We want to track correlated
1275
    // runtime checks for two inner loops in the same outer loop, fold any
1276
    // common computations, hoist loop-invariant aspects out of any outer loop,
1277
    // and unswitch the runtime checks if possible. Once hoisted, we may have
1278
    // dead (or speculatable) control flows or more combining opportunities.
1279
    ExtraPasses.addPass(EarlyCSEPass());
1280
    ExtraPasses.addPass(CorrelatedValuePropagationPass());
1281
    ExtraPasses.addPass(InstCombinePass());
1282
    LoopPassManager LPM;
1283
    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1284
                         /*AllowSpeculation=*/true));
1285
    LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1286
                                       OptimizationLevel::O3));
1287
    ExtraPasses.addPass(
1288
        createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1289
                                        /*UseBlockFrequencyInfo=*/true));
1290
    ExtraPasses.addPass(
1291
        SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1292
    ExtraPasses.addPass(InstCombinePass());
1293
    FPM.addPass(std::move(ExtraPasses));
1294
  }
1295

1296
  // Now that we've formed fast to execute loop structures, we do further
1297
  // optimizations. These are run afterward as they might block doing complex
1298
  // analyses and transforms such as what are needed for loop vectorization.
1299

1300
  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1301
  // GVN, loop transforms, and others have already run, so it's now better to
1302
  // convert to more optimized IR using more aggressive simplify CFG options.
1303
  // The extra sinking transform can create larger basic blocks, so do this
1304
  // before SLP vectorization.
1305
  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1306
                                  .forwardSwitchCondToPhi(true)
1307
                                  .convertSwitchRangeToICmp(true)
1308
                                  .convertSwitchToLookupTable(true)
1309
                                  .needCanonicalLoops(false)
1310
                                  .hoistCommonInsts(true)
1311
                                  .sinkCommonInsts(true)));
1312

1313
  if (IsFullLTO) {
1314
    FPM.addPass(SCCPPass());
1315
    FPM.addPass(InstCombinePass());
1316
    FPM.addPass(BDCEPass());
1317
  }
1318

1319
  // Optimize parallel scalar instruction chains into SIMD instructions.
1320
  if (PTO.SLPVectorization) {
1321
    FPM.addPass(SLPVectorizerPass());
1322
    if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1323
      FPM.addPass(EarlyCSEPass());
1324
    }
1325
  }
1326
  // Enhance/cleanup vector code.
1327
  FPM.addPass(VectorCombinePass());
1328

1329
  if (!IsFullLTO) {
1330
    FPM.addPass(InstCombinePass());
1331
    // Unroll small loops to hide loop backedge latency and saturate any
1332
    // parallel execution resources of an out-of-order processor. We also then
1333
    // need to clean up redundancies and loop invariant code.
1334
    // FIXME: It would be really good to use a loop-integrated instruction
1335
    // combiner for cleanup here so that the unrolling and LICM can be pipelined
1336
    // across the loop nests.
1337
    // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1338
    if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1339
      FPM.addPass(createFunctionToLoopPassAdaptor(
1340
          LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1341
    }
1342
    FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1343
        Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1344
        PTO.ForgetAllSCEVInLoopUnroll)));
1345
    FPM.addPass(WarnMissedTransformationsPass());
1346
    // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1347
    // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1348
    // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1349
    // NOTE: we are very late in the pipeline, and we don't have any LICM
1350
    // or SimplifyCFG passes scheduled after us, that would cleanup
1351
    // the CFG mess this may created if allowed to modify CFG, so forbid that.
1352
    FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1353
  }
1354

1355
  if (EnableInferAlignmentPass)
1356
    FPM.addPass(InferAlignmentPass());
1357
  FPM.addPass(InstCombinePass());
1358

1359
  // This is needed for two reasons:
1360
  //   1. It works around problems that instcombine introduces, such as sinking
1361
  //      expensive FP divides into loops containing multiplications using the
1362
  //      divide result.
1363
  //   2. It helps to clean up some loop-invariant code created by the loop
1364
  //      unroll pass when IsFullLTO=false.
1365
  FPM.addPass(createFunctionToLoopPassAdaptor(
1366
      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1367
               /*AllowSpeculation=*/true),
1368
      /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1369

1370
  // Now that we've vectorized and unrolled loops, we may have more refined
1371
  // alignment information, try to re-derive it here.
1372
  FPM.addPass(AlignmentFromAssumptionsPass());
1373
}
1374

1375
ModulePassManager
1376
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1377
                                             ThinOrFullLTOPhase LTOPhase) {
1378
  const bool LTOPreLink = isLTOPreLink(LTOPhase);
1379
  ModulePassManager MPM;
1380

1381
  // Run partial inlining pass to partially inline functions that have
1382
  // large bodies.
1383
  if (RunPartialInlining)
1384
    MPM.addPass(PartialInlinerPass());
1385

1386
  // Remove avail extern fns and globals definitions since we aren't compiling
1387
  // an object file for later LTO. For LTO we want to preserve these so they
1388
  // are eligible for inlining at link-time. Note if they are unreferenced they
1389
  // will be removed by GlobalDCE later, so this only impacts referenced
1390
  // available externally globals. Eventually they will be suppressed during
1391
  // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1392
  // may make globals referenced by available external functions dead and saves
1393
  // running remaining passes on the eliminated functions. These should be
1394
  // preserved during prelinking for link-time inlining decisions.
1395
  if (!LTOPreLink)
1396
    MPM.addPass(EliminateAvailableExternallyPass());
1397

1398
  if (EnableOrderFileInstrumentation)
1399
    MPM.addPass(InstrOrderFilePass());
1400

1401
  // Do RPO function attribute inference across the module to forward-propagate
1402
  // attributes where applicable.
1403
  // FIXME: Is this really an optimization rather than a canonicalization?
1404
  MPM.addPass(ReversePostOrderFunctionAttrsPass());
1405

1406
  // Do a post inline PGO instrumentation and use pass. This is a context
1407
  // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1408
  // cross-module inline has not been done yet. The context sensitive
1409
  // instrumentation is after all the inlines are done.
1410
  if (!LTOPreLink && PGOOpt) {
1411
    if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1412
      addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1413
                        /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1414
                        PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1415
                        PGOOpt->FS);
1416
    else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1417
      addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1418
                        /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1419
                        PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1420
                        PGOOpt->FS);
1421
  }
1422

1423
  // Re-compute GlobalsAA here prior to function passes. This is particularly
1424
  // useful as the above will have inlined, DCE'ed, and function-attr
1425
  // propagated everything. We should at this point have a reasonably minimal
1426
  // and richly annotated call graph. By computing aliasing and mod/ref
1427
  // information for all local globals here, the late loop passes and notably
1428
  // the vectorizer will be able to use them to help recognize vectorizable
1429
  // memory operations.
1430
  if (EnableGlobalAnalyses)
1431
    MPM.addPass(RecomputeGlobalsAAPass());
1432

1433
  invokeOptimizerEarlyEPCallbacks(MPM, Level);
1434

1435
  FunctionPassManager OptimizePM;
1436
  // Scheduling LoopVersioningLICM when inlining is over, because after that
1437
  // we may see more accurate aliasing. Reason to run this late is that too
1438
  // early versioning may prevent further inlining due to increase of code
1439
  // size. Other optimizations which runs later might get benefit of no-alias
1440
  // assumption in clone loop.
1441
  if (UseLoopVersioningLICM) {
1442
    OptimizePM.addPass(
1443
        createFunctionToLoopPassAdaptor(LoopVersioningLICMPass()));
1444
    // LoopVersioningLICM pass might increase new LICM opportunities.
1445
    OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1446
        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1447
                 /*AllowSpeculation=*/true),
1448
        /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1449
  }
1450

1451
  OptimizePM.addPass(Float2IntPass());
1452
  OptimizePM.addPass(LowerConstantIntrinsicsPass());
1453

1454
  if (EnableMatrix) {
1455
    OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1456
    OptimizePM.addPass(EarlyCSEPass());
1457
  }
1458

1459
  // CHR pass should only be applied with the profile information.
1460
  // The check is to check the profile summary information in CHR.
1461
  if (EnableCHR && Level == OptimizationLevel::O3)
1462
    OptimizePM.addPass(ControlHeightReductionPass());
1463

1464
  // FIXME: We need to run some loop optimizations to re-rotate loops after
1465
  // simplifycfg and others undo their rotation.
1466

1467
  // Optimize the loop execution. These passes operate on entire loop nests
1468
  // rather than on each loop in an inside-out manner, and so they are actually
1469
  // function passes.
1470

1471
  invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1472

1473
  LoopPassManager LPM;
1474
  // First rotate loops that may have been un-rotated by prior passes.
1475
  // Disable header duplication at -Oz.
1476
  LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
1477
                                 Level != OptimizationLevel::Oz,
1478
                             LTOPreLink));
1479
  // Some loops may have become dead by now. Try to delete them.
1480
  // FIXME: see discussion in https://reviews.llvm.org/D112851,
1481
  //        this may need to be revisited once we run GVN before loop deletion
1482
  //        in the simplification pipeline.
1483
  LPM.addPass(LoopDeletionPass());
1484
  OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1485
      std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1486

1487
  // Distribute loops to allow partial vectorization.  I.e. isolate dependences
1488
  // into separate loop that would otherwise inhibit vectorization.  This is
1489
  // currently only performed for loops marked with the metadata
1490
  // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1491
  OptimizePM.addPass(LoopDistributePass());
1492

1493
  // Populates the VFABI attribute with the scalar-to-vector mappings
1494
  // from the TargetLibraryInfo.
1495
  OptimizePM.addPass(InjectTLIMappings());
1496

1497
  addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1498

1499
  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1500
  // canonicalization pass that enables other optimizations. As a result,
1501
  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1502
  // result too early.
1503
  OptimizePM.addPass(LoopSinkPass());
1504

1505
  // And finally clean up LCSSA form before generating code.
1506
  OptimizePM.addPass(InstSimplifyPass());
1507

1508
  // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1509
  // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1510
  // flattening of blocks.
1511
  OptimizePM.addPass(DivRemPairsPass());
1512

1513
  // Try to annotate calls that were created during optimization.
1514
  OptimizePM.addPass(TailCallElimPass());
1515

1516
  // LoopSink (and other loop passes since the last simplifyCFG) might have
1517
  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1518
  OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1519
                                         .convertSwitchRangeToICmp(true)
1520
                                         .speculateUnpredictables(true)));
1521

1522
  // Add the core optimizing pipeline.
1523
  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1524
                                                PTO.EagerlyInvalidateAnalyses));
1525

1526
  invokeOptimizerLastEPCallbacks(MPM, Level);
1527

1528
  // Split out cold code. Splitting is done late to avoid hiding context from
1529
  // other optimizations and inadvertently regressing performance. The tradeoff
1530
  // is that this has a higher code size cost than splitting early.
1531
  if (EnableHotColdSplit && !LTOPreLink)
1532
    MPM.addPass(HotColdSplittingPass());
1533

1534
  // Search the code for similar regions of code. If enough similar regions can
1535
  // be found where extracting the regions into their own function will decrease
1536
  // the size of the program, we extract the regions, a deduplicate the
1537
  // structurally similar regions.
1538
  if (EnableIROutliner)
1539
    MPM.addPass(IROutlinerPass());
1540

1541
  // Now we need to do some global optimization transforms.
1542
  // FIXME: It would seem like these should come first in the optimization
1543
  // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1544
  // ordering here.
1545
  MPM.addPass(GlobalDCEPass());
1546
  MPM.addPass(ConstantMergePass());
1547

1548
  // Merge functions if requested. It has a better chance to merge functions
1549
  // after ConstantMerge folded jump tables.
1550
  if (PTO.MergeFunctions)
1551
    MPM.addPass(MergeFunctionsPass());
1552

1553
  if (PTO.CallGraphProfile && !LTOPreLink)
1554
    MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
1555
                              LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));
1556

1557
  // TODO: Relative look table converter pass caused an issue when full lto is
1558
  // enabled. See https://reviews.llvm.org/D94355 for more details.
1559
  // Until the issue fixed, disable this pass during pre-linking phase.
1560
  if (!LTOPreLink)
1561
    MPM.addPass(RelLookupTableConverterPass());
1562

1563
  return MPM;
1564
}
1565

1566
ModulePassManager
1567
PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1568
                                           bool LTOPreLink) {
1569
  if (Level == OptimizationLevel::O0)
1570
    return buildO0DefaultPipeline(Level, LTOPreLink);
1571

1572
  ModulePassManager MPM;
1573

1574
  // Convert @llvm.global.annotations to !annotation metadata.
1575
  MPM.addPass(Annotation2MetadataPass());
1576

1577
  // Force any function attributes we want the rest of the pipeline to observe.
1578
  MPM.addPass(ForceFunctionAttrsPass());
1579

1580
  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1581
    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1582

1583
  // Apply module pipeline start EP callback.
1584
  invokePipelineStartEPCallbacks(MPM, Level);
1585

1586
  const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1587
                                          ? ThinOrFullLTOPhase::FullLTOPreLink
1588
                                          : ThinOrFullLTOPhase::None;
1589
  // Add the core simplification pipeline.
1590
  MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase));
1591

1592
  // Now add the optimization pipeline.
1593
  MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase));
1594

1595
  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1596
      PGOOpt->Action == PGOOptions::SampleUse)
1597
    MPM.addPass(PseudoProbeUpdatePass());
1598

1599
  // Emit annotation remarks.
1600
  addAnnotationRemarksPass(MPM);
1601

1602
  if (LTOPreLink)
1603
    addRequiredLTOPreLinkPasses(MPM);
1604
  return MPM;
1605
}
1606

1607
ModulePassManager
1608
PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1609
                                        bool EmitSummary) {
1610
  ModulePassManager MPM;
1611
  if (ThinLTO)
1612
    MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
1613
  else
1614
    MPM.addPass(buildLTOPreLinkDefaultPipeline(Level));
1615
  MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1616

1617
  // Use the ThinLTO post-link pipeline with sample profiling
1618
  if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1619
    MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1620
  else {
1621
    // otherwise, just use module optimization
1622
    MPM.addPass(
1623
        buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
1624
    // Emit annotation remarks.
1625
    addAnnotationRemarksPass(MPM);
1626
  }
1627
  return MPM;
1628
}
1629

1630
ModulePassManager
1631
PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1632
  if (Level == OptimizationLevel::O0)
1633
    return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1634

1635
  ModulePassManager MPM;
1636

1637
  // Convert @llvm.global.annotations to !annotation metadata.
1638
  MPM.addPass(Annotation2MetadataPass());
1639

1640
  // Force any function attributes we want the rest of the pipeline to observe.
1641
  MPM.addPass(ForceFunctionAttrsPass());
1642

1643
  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1644
    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1645

1646
  // Apply module pipeline start EP callback.
1647
  invokePipelineStartEPCallbacks(MPM, Level);
1648

1649
  // If we are planning to perform ThinLTO later, we don't bloat the code with
1650
  // unrolling/vectorization/... now. Just simplify the module as much as we
1651
  // can.
1652
  MPM.addPass(buildModuleSimplificationPipeline(
1653
      Level, ThinOrFullLTOPhase::ThinLTOPreLink));
1654

1655
  // Run partial inlining pass to partially inline functions that have
1656
  // large bodies.
1657
  // FIXME: It isn't clear whether this is really the right place to run this
1658
  // in ThinLTO. Because there is another canonicalization and simplification
1659
  // phase that will run after the thin link, running this here ends up with
1660
  // less information than will be available later and it may grow functions in
1661
  // ways that aren't beneficial.
1662
  if (RunPartialInlining)
1663
    MPM.addPass(PartialInlinerPass());
1664

1665
  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1666
      PGOOpt->Action == PGOOptions::SampleUse)
1667
    MPM.addPass(PseudoProbeUpdatePass());
1668

1669
  // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1670
  // optimization is going to be done in PostLink stage, but clang can't add
1671
  // callbacks there in case of in-process ThinLTO called by linker.
1672
  invokeOptimizerEarlyEPCallbacks(MPM, Level);
1673
  invokeOptimizerLastEPCallbacks(MPM, Level);
1674

1675
  // Emit annotation remarks.
1676
  addAnnotationRemarksPass(MPM);
1677

1678
  addRequiredLTOPreLinkPasses(MPM);
1679

1680
  return MPM;
1681
}
1682

1683
ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1684
    OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1685
  ModulePassManager MPM;
1686

1687
  if (ImportSummary) {
1688
    // For ThinLTO we must apply the context disambiguation decisions early, to
1689
    // ensure we can correctly match the callsites to summary data.
1690
    if (EnableMemProfContextDisambiguation)
1691
      MPM.addPass(MemProfContextDisambiguation(ImportSummary));
1692

1693
    // These passes import type identifier resolutions for whole-program
1694
    // devirtualization and CFI. They must run early because other passes may
1695
    // disturb the specific instruction patterns that these passes look for,
1696
    // creating dependencies on resolutions that may not appear in the summary.
1697
    //
1698
    // For example, GVN may transform the pattern assume(type.test) appearing in
1699
    // two basic blocks into assume(phi(type.test, type.test)), which would
1700
    // transform a dependency on a WPD resolution into a dependency on a type
1701
    // identifier resolution for CFI.
1702
    //
1703
    // Also, WPD has access to more precise information than ICP and can
1704
    // devirtualize more effectively, so it should operate on the IR first.
1705
    //
1706
    // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1707
    // metadata and intrinsics.
1708
    MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1709
    MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1710
  }
1711

1712
  if (Level == OptimizationLevel::O0) {
1713
    // Run a second time to clean up any type tests left behind by WPD for use
1714
    // in ICP.
1715
    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1716
    // Drop available_externally and unreferenced globals. This is necessary
1717
    // with ThinLTO in order to avoid leaving undefined references to dead
1718
    // globals in the object file.
1719
    MPM.addPass(EliminateAvailableExternallyPass());
1720
    MPM.addPass(GlobalDCEPass());
1721
    return MPM;
1722
  }
1723

1724
  // Add the core simplification pipeline.
1725
  MPM.addPass(buildModuleSimplificationPipeline(
1726
      Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1727

1728
  // Now add the optimization pipeline.
1729
  MPM.addPass(buildModuleOptimizationPipeline(
1730
      Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1731

1732
  // Emit annotation remarks.
1733
  addAnnotationRemarksPass(MPM);
1734

1735
  return MPM;
1736
}
1737

1738
ModulePassManager
1739
PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1740
  // FIXME: We should use a customized pre-link pipeline!
1741
  return buildPerModuleDefaultPipeline(Level,
1742
                                       /* LTOPreLink */ true);
1743
}
1744

1745
ModulePassManager
1746
PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1747
                                     ModuleSummaryIndex *ExportSummary) {
1748
  ModulePassManager MPM;
1749

1750
  invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1751

1752
  // Create a function that performs CFI checks for cross-DSO calls with targets
1753
  // in the current module.
1754
  MPM.addPass(CrossDSOCFIPass());
1755

1756
  if (Level == OptimizationLevel::O0) {
1757
    // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1758
    // metadata and intrinsics.
1759
    MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1760
    MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1761
    // Run a second time to clean up any type tests left behind by WPD for use
1762
    // in ICP.
1763
    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1764

1765
    invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1766

1767
    // Emit annotation remarks.
1768
    addAnnotationRemarksPass(MPM);
1769

1770
    return MPM;
1771
  }
1772

1773
  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1774
    // Load sample profile before running the LTO optimization pipeline.
1775
    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1776
                                        PGOOpt->ProfileRemappingFile,
1777
                                        ThinOrFullLTOPhase::FullLTOPostLink));
1778
    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1779
    // RequireAnalysisPass for PSI before subsequent non-module passes.
1780
    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1781
  }
1782

1783
  // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1784
  MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1785

1786
  // Remove unused virtual tables to improve the quality of code generated by
1787
  // whole-program devirtualization and bitset lowering.
1788
  MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1789

1790
  // Do basic inference of function attributes from known properties of system
1791
  // libraries and other oracles.
1792
  MPM.addPass(InferFunctionAttrsPass());
1793

1794
  if (Level.getSpeedupLevel() > 1) {
1795
    MPM.addPass(createModuleToFunctionPassAdaptor(
1796
        CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
1797

1798
    // Indirect call promotion. This should promote all the targets that are
1799
    // left by the earlier promotion pass that promotes intra-module targets.
1800
    // This two-step promotion is to save the compile time. For LTO, it should
1801
    // produce the same result as if we only do promotion here.
1802
    MPM.addPass(PGOIndirectCallPromotion(
1803
        true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1804

1805
    // Propagate constants at call sites into the functions they call.  This
1806
    // opens opportunities for globalopt (and inlining) by substituting function
1807
    // pointers passed as arguments to direct uses of functions.
1808
    MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1809
                                         Level != OptimizationLevel::Os &&
1810
                                         Level != OptimizationLevel::Oz)));
1811

1812
    // Attach metadata to indirect call sites indicating the set of functions
1813
    // they may target at run-time. This should follow IPSCCP.
1814
    MPM.addPass(CalledValuePropagationPass());
1815
  }
1816

1817
  // Now deduce any function attributes based in the current code.
1818
  MPM.addPass(
1819
      createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1820

1821
  // Do RPO function attribute inference across the module to forward-propagate
1822
  // attributes where applicable.
1823
  // FIXME: Is this really an optimization rather than a canonicalization?
1824
  MPM.addPass(ReversePostOrderFunctionAttrsPass());
1825

1826
  // Use in-range annotations on GEP indices to split globals where beneficial.
1827
  MPM.addPass(GlobalSplitPass());
1828

1829
  // Run whole program optimization of virtual call when the list of callees
1830
  // is fixed.
1831
  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1832

1833
  // Stop here at -O1.
1834
  if (Level == OptimizationLevel::O1) {
1835
    // The LowerTypeTestsPass needs to run to lower type metadata and the
1836
    // type.test intrinsics. The pass does nothing if CFI is disabled.
1837
    MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1838
    // Run a second time to clean up any type tests left behind by WPD for use
1839
    // in ICP (which is performed earlier than this in the regular LTO
1840
    // pipeline).
1841
    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1842

1843
    invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1844

1845
    // Emit annotation remarks.
1846
    addAnnotationRemarksPass(MPM);
1847

1848
    return MPM;
1849
  }
1850

1851
  // Optimize globals to try and fold them into constants.
1852
  MPM.addPass(GlobalOptPass());
1853

1854
  // Promote any localized globals to SSA registers.
1855
  MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
1856

1857
  // Linking modules together can lead to duplicate global constant, only
1858
  // keep one copy of each constant.
1859
  MPM.addPass(ConstantMergePass());
1860

1861
  // Remove unused arguments from functions.
1862
  MPM.addPass(DeadArgumentEliminationPass());
1863

1864
  // Reduce the code after globalopt and ipsccp.  Both can open up significant
1865
  // simplification opportunities, and both can propagate functions through
1866
  // function pointers.  When this happens, we often have to resolve varargs
1867
  // calls, etc, so let instcombine do this.
1868
  FunctionPassManager PeepholeFPM;
1869
  PeepholeFPM.addPass(InstCombinePass());
1870
  if (Level.getSpeedupLevel() > 1)
1871
    PeepholeFPM.addPass(AggressiveInstCombinePass());
1872
  invokePeepholeEPCallbacks(PeepholeFPM, Level);
1873

1874
  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1875
                                                PTO.EagerlyInvalidateAnalyses));
1876

1877
  // Note: historically, the PruneEH pass was run first to deduce nounwind and
1878
  // generally clean up exception handling overhead. It isn't clear this is
1879
  // valuable as the inliner doesn't currently care whether it is inlining an
1880
  // invoke or a call.
1881
  // Run the inliner now.
1882
  if (EnableModuleInliner) {
1883
    MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
1884
                                  UseInlineAdvisor,
1885
                                  ThinOrFullLTOPhase::FullLTOPostLink));
1886
  } else {
1887
    MPM.addPass(ModuleInlinerWrapperPass(
1888
        getInlineParamsFromOptLevel(Level),
1889
        /* MandatoryFirst */ true,
1890
        InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
1891
                      InlinePass::CGSCCInliner}));
1892
  }
1893

1894
  // Perform context disambiguation after inlining, since that would reduce the
1895
  // amount of additional cloning required to distinguish the allocation
1896
  // contexts.
1897
  if (EnableMemProfContextDisambiguation)
1898
    MPM.addPass(MemProfContextDisambiguation());
1899

1900
  // Optimize globals again after we ran the inliner.
1901
  MPM.addPass(GlobalOptPass());
1902

1903
  // Run the OpenMPOpt pass again after global optimizations.
1904
  MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1905

1906
  // Garbage collect dead functions.
1907
  MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1908

1909
  // If we didn't decide to inline a function, check to see if we can
1910
  // transform it to pass arguments by value instead of by reference.
1911
  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
1912

1913
  FunctionPassManager FPM;
1914
  // The IPO Passes may leave cruft around. Clean up after them.
1915
  FPM.addPass(InstCombinePass());
1916
  invokePeepholeEPCallbacks(FPM, Level);
1917

1918
  if (EnableConstraintElimination)
1919
    FPM.addPass(ConstraintEliminationPass());
1920

1921
  FPM.addPass(JumpThreadingPass());
1922

1923
  // Do a post inline PGO instrumentation and use pass. This is a context
1924
  // sensitive PGO pass.
1925
  if (PGOOpt) {
1926
    if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1927
      addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1928
                        /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1929
                        PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1930
                        PGOOpt->FS);
1931
    else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1932
      addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1933
                        /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1934
                        PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1935
                        PGOOpt->FS);
1936
  }
1937

1938
  // Break up allocas
1939
  FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
1940

1941
  // LTO provides additional opportunities for tailcall elimination due to
1942
  // link-time inlining, and visibility of nocapture attribute.
1943
  FPM.addPass(TailCallElimPass());
1944

1945
  // Run a few AA driver optimizations here and now to cleanup the code.
1946
  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
1947
                                                PTO.EagerlyInvalidateAnalyses));
1948

1949
  MPM.addPass(
1950
      createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1951

1952
  // Require the GlobalsAA analysis for the module so we can query it within
1953
  // MainFPM.
1954
  if (EnableGlobalAnalyses) {
1955
    MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
1956
    // Invalidate AAManager so it can be recreated and pick up the newly
1957
    // available GlobalsAA.
1958
    MPM.addPass(
1959
        createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
1960
  }
1961

1962
  FunctionPassManager MainFPM;
1963
  MainFPM.addPass(createFunctionToLoopPassAdaptor(
1964
      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1965
               /*AllowSpeculation=*/true),
1966
      /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1967

1968
  if (RunNewGVN)
1969
    MainFPM.addPass(NewGVNPass());
1970
  else
1971
    MainFPM.addPass(GVNPass());
1972

1973
  // Remove dead memcpy()'s.
1974
  MainFPM.addPass(MemCpyOptPass());
1975

1976
  // Nuke dead stores.
1977
  MainFPM.addPass(DSEPass());
1978
  MainFPM.addPass(MoveAutoInitPass());
1979
  MainFPM.addPass(MergedLoadStoreMotionPass());
1980

1981
  LoopPassManager LPM;
1982
  if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1983
    LPM.addPass(LoopFlattenPass());
1984
  LPM.addPass(IndVarSimplifyPass());
1985
  LPM.addPass(LoopDeletionPass());
1986
  // FIXME: Add loop interchange.
1987

1988
  // Unroll small loops and perform peeling.
1989
  LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1990
                                 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1991
                                 PTO.ForgetAllSCEVInLoopUnroll));
1992
  // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1993
  // *All* loop passes must preserve it, in order to be able to use it.
1994
  MainFPM.addPass(createFunctionToLoopPassAdaptor(
1995
      std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1996

1997
  MainFPM.addPass(LoopDistributePass());
1998

1999
  addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2000

2001
  // Run the OpenMPOpt CGSCC pass again late.
2002
  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
2003
      OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
2004

2005
  invokePeepholeEPCallbacks(MainFPM, Level);
2006
  MainFPM.addPass(JumpThreadingPass());
2007
  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2008
                                                PTO.EagerlyInvalidateAnalyses));
2009

2010
  // Lower type metadata and the type.test intrinsic. This pass supports
2011
  // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2012
  // to be run at link time if CFI is enabled. This pass does nothing if
2013
  // CFI is disabled.
2014
  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2015
  // Run a second time to clean up any type tests left behind by WPD for use
2016
  // in ICP (which is performed earlier than this in the regular LTO pipeline).
2017
  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
2018

2019
  // Enable splitting late in the FullLTO post-link pipeline.
2020
  if (EnableHotColdSplit)
2021
    MPM.addPass(HotColdSplittingPass());
2022

2023
  // Add late LTO optimization passes.
2024
  FunctionPassManager LateFPM;
2025

2026
  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2027
  // canonicalization pass that enables other optimizations. As a result,
2028
  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2029
  // result too early.
2030
  LateFPM.addPass(LoopSinkPass());
2031

2032
  // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2033
  // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2034
  // flattening of blocks.
2035
  LateFPM.addPass(DivRemPairsPass());
2036

2037
  // Delete basic blocks, which optimization passes may have killed.
2038
  LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
2039
                                      .convertSwitchRangeToICmp(true)
2040
                                      .hoistCommonInsts(true)
2041
                                      .speculateUnpredictables(true)));
2042
  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2043

2044
  // Drop bodies of available eternally objects to improve GlobalDCE.
2045
  MPM.addPass(EliminateAvailableExternallyPass());
2046

2047
  // Now that we have optimized the program, discard unreachable functions.
2048
  MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2049

2050
  if (PTO.MergeFunctions)
2051
    MPM.addPass(MergeFunctionsPass());
2052

2053
  if (PTO.CallGraphProfile)
2054
    MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2055

2056
  invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2057

2058
  // Emit annotation remarks.
2059
  addAnnotationRemarksPass(MPM);
2060

2061
  return MPM;
2062
}
2063

2064
ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2065
                                                      bool LTOPreLink) {
2066
  assert(Level == OptimizationLevel::O0 &&
2067
         "buildO0DefaultPipeline should only be used with O0");
2068

2069
  ModulePassManager MPM;
2070

2071
  // Perform pseudo probe instrumentation in O0 mode. This is for the
2072
  // consistency between different build modes. For example, a LTO build can be
2073
  // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2074
  // the postlink will require pseudo probe instrumentation in the prelink.
2075
  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2076
    MPM.addPass(SampleProfileProbePass(TM));
2077

2078
  if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2079
                 PGOOpt->Action == PGOOptions::IRUse))
2080
    addPGOInstrPassesForO0(
2081
        MPM,
2082
        /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2083
        /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2084
        PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2085

2086
  // Instrument function entry and exit before all inlining.
2087
  MPM.addPass(createModuleToFunctionPassAdaptor(
2088
      EntryExitInstrumenterPass(/*PostInlining=*/false)));
2089

2090
  invokePipelineStartEPCallbacks(MPM, Level);
2091

2092
  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2093
    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
2094

2095
  invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
2096

2097
  // Build a minimal pipeline based on the semantics required by LLVM,
2098
  // which is just that always inlining occurs. Further, disable generating
2099
  // lifetime intrinsics to avoid enabling further optimizations during
2100
  // code generation.
2101
  MPM.addPass(AlwaysInlinerPass(
2102
      /*InsertLifetimeIntrinsics=*/false));
2103

2104
  if (PTO.MergeFunctions)
2105
    MPM.addPass(MergeFunctionsPass());
2106

2107
  if (EnableMatrix)
2108
    MPM.addPass(
2109
        createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
2110

2111
  if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2112
    CGSCCPassManager CGPM;
2113
    invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2114
    if (!CGPM.isEmpty())
2115
      MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2116
  }
2117
  if (!LateLoopOptimizationsEPCallbacks.empty()) {
2118
    LoopPassManager LPM;
2119
    invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2120
    if (!LPM.isEmpty()) {
2121
      MPM.addPass(createModuleToFunctionPassAdaptor(
2122
          createFunctionToLoopPassAdaptor(std::move(LPM))));
2123
    }
2124
  }
2125
  if (!LoopOptimizerEndEPCallbacks.empty()) {
2126
    LoopPassManager LPM;
2127
    invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2128
    if (!LPM.isEmpty()) {
2129
      MPM.addPass(createModuleToFunctionPassAdaptor(
2130
          createFunctionToLoopPassAdaptor(std::move(LPM))));
2131
    }
2132
  }
2133
  if (!ScalarOptimizerLateEPCallbacks.empty()) {
2134
    FunctionPassManager FPM;
2135
    invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2136
    if (!FPM.isEmpty())
2137
      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2138
  }
2139

2140
  invokeOptimizerEarlyEPCallbacks(MPM, Level);
2141

2142
  if (!VectorizerStartEPCallbacks.empty()) {
2143
    FunctionPassManager FPM;
2144
    invokeVectorizerStartEPCallbacks(FPM, Level);
2145
    if (!FPM.isEmpty())
2146
      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2147
  }
2148

2149
  ModulePassManager CoroPM;
2150
  CoroPM.addPass(CoroEarlyPass());
2151
  CGSCCPassManager CGPM;
2152
  CGPM.addPass(CoroSplitPass());
2153
  CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2154
  CoroPM.addPass(CoroCleanupPass());
2155
  CoroPM.addPass(GlobalDCEPass());
2156
  MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2157

2158
  invokeOptimizerLastEPCallbacks(MPM, Level);
2159

2160
  if (LTOPreLink)
2161
    addRequiredLTOPreLinkPasses(MPM);
2162

2163
  MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
2164

2165
  return MPM;
2166
}
2167

2168
AAManager PassBuilder::buildDefaultAAPipeline() {
2169
  AAManager AA;
2170

2171
  // The order in which these are registered determines their priority when
2172
  // being queried.
2173

2174
  // First we register the basic alias analysis that provides the majority of
2175
  // per-function local AA logic. This is a stateless, on-demand local set of
2176
  // AA techniques.
2177
  AA.registerFunctionAnalysis<BasicAA>();
2178

2179
  // Next we query fast, specialized alias analyses that wrap IR-embedded
2180
  // information about aliasing.
2181
  AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2182
  AA.registerFunctionAnalysis<TypeBasedAA>();
2183

2184
  // Add support for querying global aliasing information when available.
2185
  // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2186
  // analysis, all that the `AAManager` can do is query for any *cached*
2187
  // results from `GlobalsAA` through a readonly proxy.
2188
  if (EnableGlobalAnalyses)
2189
    AA.registerModuleAnalysis<GlobalsAA>();
2190

2191
  // Add target-specific alias analyses.
2192
  if (TM)
2193
    TM->registerDefaultAliasAnalyses(AA);
2194

2195
  return AA;
2196
}
2197

2198
Product

Resources

Company