Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
35262 views
1
//===- Construction of pass pipelines -------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
///
10
/// This file provides the implementation of the PassBuilder based on our
11
/// static pass registry as well as related functionality. It also provides
12
/// helpers to aid in analyzing, debugging, and testing passes and pass
13
/// pipelines.
14
///
15
//===----------------------------------------------------------------------===//
16
17
#include "llvm/ADT/Statistic.h"
18
#include "llvm/Analysis/AliasAnalysis.h"
19
#include "llvm/Analysis/BasicAliasAnalysis.h"
20
#include "llvm/Analysis/CGSCCPassManager.h"
21
#include "llvm/Analysis/GlobalsModRef.h"
22
#include "llvm/Analysis/InlineAdvisor.h"
23
#include "llvm/Analysis/ProfileSummaryInfo.h"
24
#include "llvm/Analysis/ScopedNoAliasAA.h"
25
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
26
#include "llvm/IR/PassManager.h"
27
#include "llvm/Passes/OptimizationLevel.h"
28
#include "llvm/Passes/PassBuilder.h"
29
#include "llvm/Support/CommandLine.h"
30
#include "llvm/Support/ErrorHandling.h"
31
#include "llvm/Support/PGOOptions.h"
32
#include "llvm/Support/VirtualFileSystem.h"
33
#include "llvm/Target/TargetMachine.h"
34
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
35
#include "llvm/Transforms/Coroutines/CoroCleanup.h"
36
#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
37
#include "llvm/Transforms/Coroutines/CoroEarly.h"
38
#include "llvm/Transforms/Coroutines/CoroElide.h"
39
#include "llvm/Transforms/Coroutines/CoroSplit.h"
40
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
41
#include "llvm/Transforms/IPO/AlwaysInliner.h"
42
#include "llvm/Transforms/IPO/Annotation2Metadata.h"
43
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
44
#include "llvm/Transforms/IPO/Attributor.h"
45
#include "llvm/Transforms/IPO/CalledValuePropagation.h"
46
#include "llvm/Transforms/IPO/ConstantMerge.h"
47
#include "llvm/Transforms/IPO/CrossDSOCFI.h"
48
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
49
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
50
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
51
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
52
#include "llvm/Transforms/IPO/FunctionAttrs.h"
53
#include "llvm/Transforms/IPO/GlobalDCE.h"
54
#include "llvm/Transforms/IPO/GlobalOpt.h"
55
#include "llvm/Transforms/IPO/GlobalSplit.h"
56
#include "llvm/Transforms/IPO/HotColdSplitting.h"
57
#include "llvm/Transforms/IPO/IROutliner.h"
58
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
59
#include "llvm/Transforms/IPO/Inliner.h"
60
#include "llvm/Transforms/IPO/LowerTypeTests.h"
61
#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
62
#include "llvm/Transforms/IPO/MergeFunctions.h"
63
#include "llvm/Transforms/IPO/ModuleInliner.h"
64
#include "llvm/Transforms/IPO/OpenMPOpt.h"
65
#include "llvm/Transforms/IPO/PartialInlining.h"
66
#include "llvm/Transforms/IPO/SCCP.h"
67
#include "llvm/Transforms/IPO/SampleProfile.h"
68
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
69
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
70
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
71
#include "llvm/Transforms/InstCombine/InstCombine.h"
72
#include "llvm/Transforms/Instrumentation/CGProfile.h"
73
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
74
#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
75
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
76
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
77
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
78
#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
79
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
80
#include "llvm/Transforms/Scalar/ADCE.h"
81
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
82
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
83
#include "llvm/Transforms/Scalar/BDCE.h"
84
#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
85
#include "llvm/Transforms/Scalar/ConstraintElimination.h"
86
#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
87
#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
88
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
89
#include "llvm/Transforms/Scalar/DivRemPairs.h"
90
#include "llvm/Transforms/Scalar/EarlyCSE.h"
91
#include "llvm/Transforms/Scalar/Float2Int.h"
92
#include "llvm/Transforms/Scalar/GVN.h"
93
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
94
#include "llvm/Transforms/Scalar/InferAlignment.h"
95
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
96
#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
97
#include "llvm/Transforms/Scalar/JumpThreading.h"
98
#include "llvm/Transforms/Scalar/LICM.h"
99
#include "llvm/Transforms/Scalar/LoopDeletion.h"
100
#include "llvm/Transforms/Scalar/LoopDistribute.h"
101
#include "llvm/Transforms/Scalar/LoopFlatten.h"
102
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
103
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
104
#include "llvm/Transforms/Scalar/LoopInterchange.h"
105
#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
106
#include "llvm/Transforms/Scalar/LoopPassManager.h"
107
#include "llvm/Transforms/Scalar/LoopRotation.h"
108
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
109
#include "llvm/Transforms/Scalar/LoopSink.h"
110
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
111
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
112
#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
113
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
114
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
115
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
116
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
117
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
118
#include "llvm/Transforms/Scalar/NewGVN.h"
119
#include "llvm/Transforms/Scalar/Reassociate.h"
120
#include "llvm/Transforms/Scalar/SCCP.h"
121
#include "llvm/Transforms/Scalar/SROA.h"
122
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
123
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
124
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
125
#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
126
#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
127
#include "llvm/Transforms/Utils/AddDiscriminators.h"
128
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
129
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
130
#include "llvm/Transforms/Utils/CountVisits.h"
131
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
132
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
133
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
134
#include "llvm/Transforms/Utils/Mem2Reg.h"
135
#include "llvm/Transforms/Utils/MoveAutoInit.h"
136
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
137
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
138
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
139
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
140
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
141
#include "llvm/Transforms/Vectorize/VectorCombine.h"
142
143
using namespace llvm;
144
145
static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
146
"enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
147
cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
148
cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
149
"Heuristics-based inliner version"),
150
clEnumValN(InliningAdvisorMode::Development, "development",
151
"Use development mode (runtime-loadable model)"),
152
clEnumValN(InliningAdvisorMode::Release, "release",
153
"Use release mode (AOT-compiled model)")));
154
155
static cl::opt<bool> EnableSyntheticCounts(
156
"enable-npm-synthetic-counts", cl::Hidden,
157
cl::desc("Run synthetic function entry count generation "
158
"pass"));
159
160
/// Flag to enable inline deferral during PGO.
161
static cl::opt<bool>
162
EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
163
cl::Hidden,
164
cl::desc("Enable inline deferral during PGO"));
165
166
static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
167
cl::init(false), cl::Hidden,
168
cl::desc("Enable module inliner"));
169
170
static cl::opt<bool> PerformMandatoryInliningsFirst(
171
"mandatory-inlining-first", cl::init(false), cl::Hidden,
172
cl::desc("Perform mandatory inlinings module-wide, before performing "
173
"inlining"));
174
175
static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
176
"eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
177
cl::desc("Eagerly invalidate more analyses in default pipelines"));
178
179
static cl::opt<bool> EnableMergeFunctions(
180
"enable-merge-functions", cl::init(false), cl::Hidden,
181
cl::desc("Enable function merging as part of the optimization pipeline"));
182
183
static cl::opt<bool> EnablePostPGOLoopRotation(
184
"enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
185
cl::desc("Run the loop rotation transformation after PGO instrumentation"));
186
187
static cl::opt<bool> EnableGlobalAnalyses(
188
"enable-global-analyses", cl::init(true), cl::Hidden,
189
cl::desc("Enable inter-procedural analyses"));
190
191
static cl::opt<bool>
192
RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
193
cl::desc("Run Partial inlinining pass"));
194
195
static cl::opt<bool> ExtraVectorizerPasses(
196
"extra-vectorizer-passes", cl::init(false), cl::Hidden,
197
cl::desc("Run cleanup optimization passes after vectorization"));
198
199
static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
200
cl::desc("Run the NewGVN pass"));
201
202
static cl::opt<bool> EnableLoopInterchange(
203
"enable-loopinterchange", cl::init(false), cl::Hidden,
204
cl::desc("Enable the experimental LoopInterchange Pass"));
205
206
static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
207
cl::init(false), cl::Hidden,
208
cl::desc("Enable Unroll And Jam Pass"));
209
210
static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
211
cl::Hidden,
212
cl::desc("Enable the LoopFlatten Pass"));
213
214
// Experimentally allow loop header duplication. This should allow for better
215
// optimization at Oz, since loop-idiom recognition can then recognize things
216
// like memcpy. If this ends up being useful for many targets, we should drop
217
// this flag and make a code generation option that can be controlled
218
// independent of the opt level and exposed through the frontend.
219
static cl::opt<bool> EnableLoopHeaderDuplication(
220
"enable-loop-header-duplication", cl::init(false), cl::Hidden,
221
cl::desc("Enable loop header duplication at any optimization level"));
222
223
static cl::opt<bool>
224
EnableDFAJumpThreading("enable-dfa-jump-thread",
225
cl::desc("Enable DFA jump threading"),
226
cl::init(false), cl::Hidden);
227
228
// TODO: turn on and remove flag
229
static cl::opt<bool> EnablePGOForceFunctionAttrs(
230
"enable-pgo-force-function-attrs",
231
cl::desc("Enable pass to set function attributes based on PGO profiles"),
232
cl::init(false));
233
234
static cl::opt<bool>
235
EnableHotColdSplit("hot-cold-split",
236
cl::desc("Enable hot-cold splitting pass"));
237
238
static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
239
cl::Hidden,
240
cl::desc("Enable ir outliner pass"));
241
242
static cl::opt<bool>
243
DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
244
cl::desc("Disable pre-instrumentation inliner"));
245
246
static cl::opt<int> PreInlineThreshold(
247
"preinline-threshold", cl::Hidden, cl::init(75),
248
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
249
"(default = 75)"));
250
251
static cl::opt<bool>
252
EnableGVNHoist("enable-gvn-hoist",
253
cl::desc("Enable the GVN hoisting pass (default = off)"));
254
255
static cl::opt<bool>
256
EnableGVNSink("enable-gvn-sink",
257
cl::desc("Enable the GVN sinking pass (default = off)"));
258
259
static cl::opt<bool> EnableJumpTableToSwitch(
260
"enable-jump-table-to-switch",
261
cl::desc("Enable JumpTableToSwitch pass (default = off)"));
262
263
// This option is used in simplifying testing SampleFDO optimizations for
264
// profile loading.
265
static cl::opt<bool>
266
EnableCHR("enable-chr", cl::init(true), cl::Hidden,
267
cl::desc("Enable control height reduction optimization (CHR)"));
268
269
static cl::opt<bool> FlattenedProfileUsed(
270
"flattened-profile-used", cl::init(false), cl::Hidden,
271
cl::desc("Indicate the sample profile being used is flattened, i.e., "
272
"no inline hierachy exists in the profile"));
273
274
static cl::opt<bool> EnableOrderFileInstrumentation(
275
"enable-order-file-instrumentation", cl::init(false), cl::Hidden,
276
cl::desc("Enable order file instrumentation (default = off)"));
277
278
static cl::opt<bool>
279
EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
280
cl::desc("Enable lowering of the matrix intrinsics"));
281
282
static cl::opt<bool> EnableConstraintElimination(
283
"enable-constraint-elimination", cl::init(true), cl::Hidden,
284
cl::desc(
285
"Enable pass to eliminate conditions based on linear constraints"));
286
287
static cl::opt<AttributorRunOption> AttributorRun(
288
"attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
289
cl::desc("Enable the attributor inter-procedural deduction pass"),
290
cl::values(clEnumValN(AttributorRunOption::ALL, "all",
291
"enable all attributor runs"),
292
clEnumValN(AttributorRunOption::MODULE, "module",
293
"enable module-wide attributor runs"),
294
clEnumValN(AttributorRunOption::CGSCC, "cgscc",
295
"enable call graph SCC attributor runs"),
296
clEnumValN(AttributorRunOption::NONE, "none",
297
"disable attributor runs")));
298
299
static cl::opt<bool> EnableSampledInstr(
300
"enable-sampled-instrumentation", cl::init(false), cl::Hidden,
301
cl::desc("Enable profile instrumentation sampling (default = off)"));
302
static cl::opt<bool> UseLoopVersioningLICM(
303
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
304
cl::desc("Enable the experimental Loop Versioning LICM pass"));
305
306
namespace llvm {
307
extern cl::opt<bool> EnableMemProfContextDisambiguation;
308
309
extern cl::opt<bool> EnableInferAlignmentPass;
310
} // namespace llvm
311
312
PipelineTuningOptions::PipelineTuningOptions() {
313
LoopInterleaving = true;
314
LoopVectorization = true;
315
SLPVectorization = false;
316
LoopUnrolling = true;
317
ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
318
LicmMssaOptCap = SetLicmMssaOptCap;
319
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
320
CallGraphProfile = true;
321
UnifiedLTO = false;
322
MergeFunctions = EnableMergeFunctions;
323
InlinerThreshold = -1;
324
EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
325
}
326
327
namespace llvm {
328
extern cl::opt<unsigned> MaxDevirtIterations;
329
} // namespace llvm
330
331
void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
332
OptimizationLevel Level) {
333
for (auto &C : PeepholeEPCallbacks)
334
C(FPM, Level);
335
}
336
void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
337
LoopPassManager &LPM, OptimizationLevel Level) {
338
for (auto &C : LateLoopOptimizationsEPCallbacks)
339
C(LPM, Level);
340
}
341
void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
342
OptimizationLevel Level) {
343
for (auto &C : LoopOptimizerEndEPCallbacks)
344
C(LPM, Level);
345
}
346
void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
347
FunctionPassManager &FPM, OptimizationLevel Level) {
348
for (auto &C : ScalarOptimizerLateEPCallbacks)
349
C(FPM, Level);
350
}
351
void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
352
OptimizationLevel Level) {
353
for (auto &C : CGSCCOptimizerLateEPCallbacks)
354
C(CGPM, Level);
355
}
356
void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
357
OptimizationLevel Level) {
358
for (auto &C : VectorizerStartEPCallbacks)
359
C(FPM, Level);
360
}
361
void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
362
OptimizationLevel Level) {
363
for (auto &C : OptimizerEarlyEPCallbacks)
364
C(MPM, Level);
365
}
366
void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
367
OptimizationLevel Level) {
368
for (auto &C : OptimizerLastEPCallbacks)
369
C(MPM, Level);
370
}
371
void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
372
ModulePassManager &MPM, OptimizationLevel Level) {
373
for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
374
C(MPM, Level);
375
}
376
void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
377
ModulePassManager &MPM, OptimizationLevel Level) {
378
for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
379
C(MPM, Level);
380
}
381
void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
382
OptimizationLevel Level) {
383
for (auto &C : PipelineStartEPCallbacks)
384
C(MPM, Level);
385
}
386
void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
387
ModulePassManager &MPM, OptimizationLevel Level) {
388
for (auto &C : PipelineEarlySimplificationEPCallbacks)
389
C(MPM, Level);
390
}
391
392
// Helper to add AnnotationRemarksPass.
393
static void addAnnotationRemarksPass(ModulePassManager &MPM) {
394
MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
395
}
396
397
// Helper to check if the current compilation phase is preparing for LTO
398
static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
399
return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
400
Phase == ThinOrFullLTOPhase::FullLTOPreLink;
401
}
402
403
// TODO: Investigate the cost/benefit of tail call elimination on debugging.
404
FunctionPassManager
405
PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
406
ThinOrFullLTOPhase Phase) {
407
408
FunctionPassManager FPM;
409
410
if (AreStatisticsEnabled())
411
FPM.addPass(CountVisitsPass());
412
413
// Form SSA out of local memory accesses after breaking apart aggregates into
414
// scalars.
415
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
416
417
// Catch trivial redundancies
418
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
419
420
// Hoisting of scalars and load expressions.
421
FPM.addPass(
422
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
423
FPM.addPass(InstCombinePass());
424
425
FPM.addPass(LibCallsShrinkWrapPass());
426
427
invokePeepholeEPCallbacks(FPM, Level);
428
429
FPM.addPass(
430
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
431
432
// Form canonically associated expression trees, and simplify the trees using
433
// basic mathematical properties. For example, this will form (nearly)
434
// minimal multiplication trees.
435
FPM.addPass(ReassociatePass());
436
437
// Add the primary loop simplification pipeline.
438
// FIXME: Currently this is split into two loop pass pipelines because we run
439
// some function passes in between them. These can and should be removed
440
// and/or replaced by scheduling the loop pass equivalents in the correct
441
// positions. But those equivalent passes aren't powerful enough yet.
442
// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
443
// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
444
// fully replace `SimplifyCFGPass`, and the closest to the other we have is
445
// `LoopInstSimplify`.
446
LoopPassManager LPM1, LPM2;
447
448
// Simplify the loop body. We do this initially to clean up after other loop
449
// passes run, either when iterating on a loop or on inner loops with
450
// implications on the outer loop.
451
LPM1.addPass(LoopInstSimplifyPass());
452
LPM1.addPass(LoopSimplifyCFGPass());
453
454
// Try to remove as much code from the loop header as possible,
455
// to reduce amount of IR that will have to be duplicated. However,
456
// do not perform speculative hoisting the first time as LICM
457
// will destroy metadata that may not need to be destroyed if run
458
// after loop rotation.
459
// TODO: Investigate promotion cap for O1.
460
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
461
/*AllowSpeculation=*/false));
462
463
LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
464
isLTOPreLink(Phase)));
465
// TODO: Investigate promotion cap for O1.
466
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
467
/*AllowSpeculation=*/true));
468
LPM1.addPass(SimpleLoopUnswitchPass());
469
if (EnableLoopFlatten)
470
LPM1.addPass(LoopFlattenPass());
471
472
LPM2.addPass(LoopIdiomRecognizePass());
473
LPM2.addPass(IndVarSimplifyPass());
474
475
invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
476
477
LPM2.addPass(LoopDeletionPass());
478
479
if (EnableLoopInterchange)
480
LPM2.addPass(LoopInterchangePass());
481
482
// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
483
// because it changes IR to makes profile annotation in back compile
484
// inaccurate. The normal unroller doesn't pay attention to forced full unroll
485
// attributes so we need to make sure and allow the full unroll pass to pay
486
// attention to it.
487
if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
488
PGOOpt->Action != PGOOptions::SampleUse)
489
LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
490
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
491
PTO.ForgetAllSCEVInLoopUnroll));
492
493
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
494
495
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
496
/*UseMemorySSA=*/true,
497
/*UseBlockFrequencyInfo=*/true));
498
FPM.addPass(
499
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
500
FPM.addPass(InstCombinePass());
501
// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
502
// *All* loop passes must preserve it, in order to be able to use it.
503
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
504
/*UseMemorySSA=*/false,
505
/*UseBlockFrequencyInfo=*/false));
506
507
// Delete small array after loop unroll.
508
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
509
510
// Specially optimize memory movement as it doesn't look like dataflow in SSA.
511
FPM.addPass(MemCpyOptPass());
512
513
// Sparse conditional constant propagation.
514
// FIXME: It isn't clear why we do this *after* loop passes rather than
515
// before...
516
FPM.addPass(SCCPPass());
517
518
// Delete dead bit computations (instcombine runs after to fold away the dead
519
// computations, and then ADCE will run later to exploit any new DCE
520
// opportunities that creates).
521
FPM.addPass(BDCEPass());
522
523
// Run instcombine after redundancy and dead bit elimination to exploit
524
// opportunities opened up by them.
525
FPM.addPass(InstCombinePass());
526
invokePeepholeEPCallbacks(FPM, Level);
527
528
FPM.addPass(CoroElidePass());
529
530
invokeScalarOptimizerLateEPCallbacks(FPM, Level);
531
532
// Finally, do an expensive DCE pass to catch all the dead code exposed by
533
// the simplifications and basic cleanup after all the simplifications.
534
// TODO: Investigate if this is too expensive.
535
FPM.addPass(ADCEPass());
536
FPM.addPass(
537
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
538
FPM.addPass(InstCombinePass());
539
invokePeepholeEPCallbacks(FPM, Level);
540
541
return FPM;
542
}
543
544
FunctionPassManager
545
PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
546
ThinOrFullLTOPhase Phase) {
547
assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
548
549
// The O1 pipeline has a separate pipeline creation function to simplify
550
// construction readability.
551
if (Level.getSpeedupLevel() == 1)
552
return buildO1FunctionSimplificationPipeline(Level, Phase);
553
554
FunctionPassManager FPM;
555
556
if (AreStatisticsEnabled())
557
FPM.addPass(CountVisitsPass());
558
559
// Form SSA out of local memory accesses after breaking apart aggregates into
560
// scalars.
561
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
562
563
// Catch trivial redundancies
564
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
565
if (EnableKnowledgeRetention)
566
FPM.addPass(AssumeSimplifyPass());
567
568
// Hoisting of scalars and load expressions.
569
if (EnableGVNHoist)
570
FPM.addPass(GVNHoistPass());
571
572
// Global value numbering based sinking.
573
if (EnableGVNSink) {
574
FPM.addPass(GVNSinkPass());
575
FPM.addPass(
576
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
577
}
578
579
// Speculative execution if the target has divergent branches; otherwise nop.
580
FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
581
582
// Optimize based on known information about branches, and cleanup afterward.
583
FPM.addPass(JumpThreadingPass());
584
FPM.addPass(CorrelatedValuePropagationPass());
585
586
// Jump table to switch conversion.
587
if (EnableJumpTableToSwitch)
588
FPM.addPass(JumpTableToSwitchPass());
589
590
FPM.addPass(
591
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
592
FPM.addPass(InstCombinePass());
593
FPM.addPass(AggressiveInstCombinePass());
594
595
if (!Level.isOptimizingForSize())
596
FPM.addPass(LibCallsShrinkWrapPass());
597
598
invokePeepholeEPCallbacks(FPM, Level);
599
600
// For PGO use pipeline, try to optimize memory intrinsics such as memcpy
601
// using the size value profile. Don't perform this when optimizing for size.
602
if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
603
!Level.isOptimizingForSize())
604
FPM.addPass(PGOMemOPSizeOpt());
605
606
FPM.addPass(TailCallElimPass());
607
FPM.addPass(
608
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
609
610
// Form canonically associated expression trees, and simplify the trees using
611
// basic mathematical properties. For example, this will form (nearly)
612
// minimal multiplication trees.
613
FPM.addPass(ReassociatePass());
614
615
if (EnableConstraintElimination)
616
FPM.addPass(ConstraintEliminationPass());
617
618
// Add the primary loop simplification pipeline.
619
// FIXME: Currently this is split into two loop pass pipelines because we run
620
// some function passes in between them. These can and should be removed
621
// and/or replaced by scheduling the loop pass equivalents in the correct
622
// positions. But those equivalent passes aren't powerful enough yet.
623
// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
624
// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
625
// fully replace `SimplifyCFGPass`, and the closest to the other we have is
626
// `LoopInstSimplify`.
627
LoopPassManager LPM1, LPM2;
628
629
// Simplify the loop body. We do this initially to clean up after other loop
630
// passes run, either when iterating on a loop or on inner loops with
631
// implications on the outer loop.
632
LPM1.addPass(LoopInstSimplifyPass());
633
LPM1.addPass(LoopSimplifyCFGPass());
634
635
// Try to remove as much code from the loop header as possible,
636
// to reduce amount of IR that will have to be duplicated. However,
637
// do not perform speculative hoisting the first time as LICM
638
// will destroy metadata that may not need to be destroyed if run
639
// after loop rotation.
640
// TODO: Investigate promotion cap for O1.
641
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
642
/*AllowSpeculation=*/false));
643
644
// Disable header duplication in loop rotation at -Oz.
645
LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
646
Level != OptimizationLevel::Oz,
647
isLTOPreLink(Phase)));
648
// TODO: Investigate promotion cap for O1.
649
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
650
/*AllowSpeculation=*/true));
651
LPM1.addPass(
652
SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
653
if (EnableLoopFlatten)
654
LPM1.addPass(LoopFlattenPass());
655
656
LPM2.addPass(LoopIdiomRecognizePass());
657
LPM2.addPass(IndVarSimplifyPass());
658
659
{
660
ExtraSimpleLoopUnswitchPassManager ExtraPasses;
661
ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
662
OptimizationLevel::O3));
663
LPM2.addPass(std::move(ExtraPasses));
664
}
665
666
invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
667
668
LPM2.addPass(LoopDeletionPass());
669
670
if (EnableLoopInterchange)
671
LPM2.addPass(LoopInterchangePass());
672
673
// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
674
// because it changes IR to makes profile annotation in back compile
675
// inaccurate. The normal unroller doesn't pay attention to forced full unroll
676
// attributes so we need to make sure and allow the full unroll pass to pay
677
// attention to it.
678
if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
679
PGOOpt->Action != PGOOptions::SampleUse)
680
LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
681
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
682
PTO.ForgetAllSCEVInLoopUnroll));
683
684
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
685
686
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
687
/*UseMemorySSA=*/true,
688
/*UseBlockFrequencyInfo=*/true));
689
FPM.addPass(
690
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
691
FPM.addPass(InstCombinePass());
692
// The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
693
// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
694
// *All* loop passes must preserve it, in order to be able to use it.
695
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
696
/*UseMemorySSA=*/false,
697
/*UseBlockFrequencyInfo=*/false));
698
699
// Delete small array after loop unroll.
700
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
701
702
// Try vectorization/scalarization transforms that are both improvements
703
// themselves and can allow further folds with GVN and InstCombine.
704
FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
705
706
// Eliminate redundancies.
707
FPM.addPass(MergedLoadStoreMotionPass());
708
if (RunNewGVN)
709
FPM.addPass(NewGVNPass());
710
else
711
FPM.addPass(GVNPass());
712
713
// Sparse conditional constant propagation.
714
// FIXME: It isn't clear why we do this *after* loop passes rather than
715
// before...
716
FPM.addPass(SCCPPass());
717
718
// Delete dead bit computations (instcombine runs after to fold away the dead
719
// computations, and then ADCE will run later to exploit any new DCE
720
// opportunities that creates).
721
FPM.addPass(BDCEPass());
722
723
// Run instcombine after redundancy and dead bit elimination to exploit
724
// opportunities opened up by them.
725
FPM.addPass(InstCombinePass());
726
invokePeepholeEPCallbacks(FPM, Level);
727
728
// Re-consider control flow based optimizations after redundancy elimination,
729
// redo DCE, etc.
730
if (EnableDFAJumpThreading)
731
FPM.addPass(DFAJumpThreadingPass());
732
733
FPM.addPass(JumpThreadingPass());
734
FPM.addPass(CorrelatedValuePropagationPass());
735
736
// Finally, do an expensive DCE pass to catch all the dead code exposed by
737
// the simplifications and basic cleanup after all the simplifications.
738
// TODO: Investigate if this is too expensive.
739
FPM.addPass(ADCEPass());
740
741
// Specially optimize memory movement as it doesn't look like dataflow in SSA.
742
FPM.addPass(MemCpyOptPass());
743
744
FPM.addPass(DSEPass());
745
FPM.addPass(MoveAutoInitPass());
746
747
FPM.addPass(createFunctionToLoopPassAdaptor(
748
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
749
/*AllowSpeculation=*/true),
750
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
751
752
FPM.addPass(CoroElidePass());
753
754
invokeScalarOptimizerLateEPCallbacks(FPM, Level);
755
756
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
757
.convertSwitchRangeToICmp(true)
758
.hoistCommonInsts(true)
759
.sinkCommonInsts(true)));
760
FPM.addPass(InstCombinePass());
761
invokePeepholeEPCallbacks(FPM, Level);
762
763
return FPM;
764
}
765
766
void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
767
MPM.addPass(CanonicalizeAliasesPass());
768
MPM.addPass(NameAnonGlobalPass());
769
}
770
771
void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
772
OptimizationLevel Level,
773
ThinOrFullLTOPhase LTOPhase) {
774
assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
775
if (DisablePreInliner)
776
return;
777
InlineParams IP;
778
779
IP.DefaultThreshold = PreInlineThreshold;
780
781
// FIXME: The hint threshold has the same value used by the regular inliner
782
// when not optimzing for size. This should probably be lowered after
783
// performance testing.
784
// FIXME: this comment is cargo culted from the old pass manager, revisit).
785
IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
786
ModuleInlinerWrapperPass MIWP(
787
IP, /* MandatoryFirst */ true,
788
InlineContext{LTOPhase, InlinePass::EarlyInliner});
789
CGSCCPassManager &CGPipeline = MIWP.getPM();
790
791
FunctionPassManager FPM;
792
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
793
FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
794
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
795
true))); // Merge & remove basic blocks.
796
FPM.addPass(InstCombinePass()); // Combine silly sequences.
797
invokePeepholeEPCallbacks(FPM, Level);
798
799
CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
800
std::move(FPM), PTO.EagerlyInvalidateAnalyses));
801
802
MPM.addPass(std::move(MIWP));
803
804
// Delete anything that is now dead to make sure that we don't instrument
805
// dead code. Instrumentation can end up keeping dead code around and
806
// dramatically increase code size.
807
MPM.addPass(GlobalDCEPass());
808
}
809
810
void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
811
OptimizationLevel Level) {
812
if (EnablePostPGOLoopRotation) {
813
// Disable header duplication in loop rotation at -Oz.
814
MPM.addPass(createModuleToFunctionPassAdaptor(
815
createFunctionToLoopPassAdaptor(
816
LoopRotatePass(EnableLoopHeaderDuplication ||
817
Level != OptimizationLevel::Oz),
818
/*UseMemorySSA=*/false,
819
/*UseBlockFrequencyInfo=*/false),
820
PTO.EagerlyInvalidateAnalyses));
821
}
822
}
823
824
void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
825
OptimizationLevel Level, bool RunProfileGen,
826
bool IsCS, bool AtomicCounterUpdate,
827
std::string ProfileFile,
828
std::string ProfileRemappingFile,
829
IntrusiveRefCntPtr<vfs::FileSystem> FS) {
830
assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
831
832
if (!RunProfileGen) {
833
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
834
MPM.addPass(
835
PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
836
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
837
// RequireAnalysisPass for PSI before subsequent non-module passes.
838
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
839
return;
840
}
841
842
// Perform PGO instrumentation.
843
MPM.addPass(PGOInstrumentationGen(IsCS));
844
845
addPostPGOLoopRotation(MPM, Level);
846
// Add the profile lowering pass.
847
InstrProfOptions Options;
848
if (!ProfileFile.empty())
849
Options.InstrProfileOutput = ProfileFile;
850
// Do counter promotion at Level greater than O0.
851
Options.DoCounterPromotion = true;
852
Options.UseBFIInPromotion = IsCS;
853
if (EnableSampledInstr) {
854
Options.Sampling = true;
855
// With sampling, there is little beneifit to enable counter promotion.
856
// But note that sampling does work with counter promotion.
857
Options.DoCounterPromotion = false;
858
}
859
Options.Atomic = AtomicCounterUpdate;
860
MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
861
}
862
863
void PassBuilder::addPGOInstrPassesForO0(
864
ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
865
bool AtomicCounterUpdate, std::string ProfileFile,
866
std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
867
if (!RunProfileGen) {
868
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
869
MPM.addPass(
870
PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
871
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
872
// RequireAnalysisPass for PSI before subsequent non-module passes.
873
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
874
return;
875
}
876
877
// Perform PGO instrumentation.
878
MPM.addPass(PGOInstrumentationGen(IsCS));
879
// Add the profile lowering pass.
880
InstrProfOptions Options;
881
if (!ProfileFile.empty())
882
Options.InstrProfileOutput = ProfileFile;
883
// Do not do counter promotion at O0.
884
Options.DoCounterPromotion = false;
885
Options.UseBFIInPromotion = IsCS;
886
Options.Atomic = AtomicCounterUpdate;
887
MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
888
}
889
890
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
891
return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
892
}
893
894
ModuleInlinerWrapperPass
895
PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
896
ThinOrFullLTOPhase Phase) {
897
InlineParams IP;
898
if (PTO.InlinerThreshold == -1)
899
IP = getInlineParamsFromOptLevel(Level);
900
else
901
IP = getInlineParams(PTO.InlinerThreshold);
902
// For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
903
// disable hot callsite inline (as much as possible [1]) because it makes
904
// profile annotation in the backend inaccurate.
905
//
906
// [1] Note the cost of a function could be below zero due to erased
907
// prologue / epilogue.
908
if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
909
PGOOpt->Action == PGOOptions::SampleUse)
910
IP.HotCallSiteThreshold = 0;
911
912
if (PGOOpt)
913
IP.EnableDeferral = EnablePGOInlineDeferral;
914
915
ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
916
InlineContext{Phase, InlinePass::CGSCCInliner},
917
UseInlineAdvisor, MaxDevirtIterations);
918
919
// Require the GlobalsAA analysis for the module so we can query it within
920
// the CGSCC pipeline.
921
if (EnableGlobalAnalyses) {
922
MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
923
// Invalidate AAManager so it can be recreated and pick up the newly
924
// available GlobalsAA.
925
MIWP.addModulePass(
926
createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
927
}
928
929
// Require the ProfileSummaryAnalysis for the module so we can query it within
930
// the inliner pass.
931
MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
932
933
// Now begin the main postorder CGSCC pipeline.
934
// FIXME: The current CGSCC pipeline has its origins in the legacy pass
935
// manager and trying to emulate its precise behavior. Much of this doesn't
936
// make a lot of sense and we should revisit the core CGSCC structure.
937
CGSCCPassManager &MainCGPipeline = MIWP.getPM();
938
939
// Note: historically, the PruneEH pass was run first to deduce nounwind and
940
// generally clean up exception handling overhead. It isn't clear this is
941
// valuable as the inliner doesn't currently care whether it is inlining an
942
// invoke or a call.
943
944
if (AttributorRun & AttributorRunOption::CGSCC)
945
MainCGPipeline.addPass(AttributorCGSCCPass());
946
947
// Deduce function attributes. We do another run of this after the function
948
// simplification pipeline, so this only needs to run when it could affect the
949
// function simplification pipeline, which is only the case with recursive
950
// functions.
951
MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
952
953
// When at O3 add argument promotion to the pass pipeline.
954
// FIXME: It isn't at all clear why this should be limited to O3.
955
if (Level == OptimizationLevel::O3)
956
MainCGPipeline.addPass(ArgumentPromotionPass());
957
958
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
959
// there are no OpenMP runtime calls present in the module.
960
if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
961
MainCGPipeline.addPass(OpenMPOptCGSCCPass());
962
963
invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
964
965
// Add the core function simplification pipeline nested inside the
966
// CGSCC walk.
967
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
968
buildFunctionSimplificationPipeline(Level, Phase),
969
PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
970
971
// Finally, deduce any function attributes based on the fully simplified
972
// function.
973
MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
974
975
// Mark that the function is fully simplified and that it shouldn't be
976
// simplified again if we somehow revisit it due to CGSCC mutations unless
977
// it's been modified since.
978
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
979
RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
980
981
MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
982
983
// Make sure we don't affect potential future NoRerun CGSCC adaptors.
984
MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
985
InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
986
987
return MIWP;
988
}
989
990
ModulePassManager
991
PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
992
ThinOrFullLTOPhase Phase) {
993
ModulePassManager MPM;
994
995
InlineParams IP = getInlineParamsFromOptLevel(Level);
996
// For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
997
// disable hot callsite inline (as much as possible [1]) because it makes
998
// profile annotation in the backend inaccurate.
999
//
1000
// [1] Note the cost of a function could be below zero due to erased
1001
// prologue / epilogue.
1002
if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
1003
PGOOpt->Action == PGOOptions::SampleUse)
1004
IP.HotCallSiteThreshold = 0;
1005
1006
if (PGOOpt)
1007
IP.EnableDeferral = EnablePGOInlineDeferral;
1008
1009
// The inline deferral logic is used to avoid losing some
1010
// inlining chance in future. It is helpful in SCC inliner, in which
1011
// inlining is processed in bottom-up order.
1012
// While in module inliner, the inlining order is a priority-based order
1013
// by default. The inline deferral is unnecessary there. So we disable the
1014
// inline deferral logic in module inliner.
1015
IP.EnableDeferral = false;
1016
1017
MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1018
1019
MPM.addPass(createModuleToFunctionPassAdaptor(
1020
buildFunctionSimplificationPipeline(Level, Phase),
1021
PTO.EagerlyInvalidateAnalyses));
1022
1023
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
1024
CoroSplitPass(Level != OptimizationLevel::O0)));
1025
1026
return MPM;
1027
}
1028
1029
ModulePassManager
1030
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1031
ThinOrFullLTOPhase Phase) {
1032
assert(Level != OptimizationLevel::O0 &&
1033
"Should not be used for O0 pipeline");
1034
1035
assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1036
"FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1037
1038
ModulePassManager MPM;
1039
1040
// Place pseudo probe instrumentation as the first pass of the pipeline to
1041
// minimize the impact of optimization changes.
1042
if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1043
Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1044
MPM.addPass(SampleProfileProbePass(TM));
1045
1046
bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1047
1048
// In ThinLTO mode, when flattened profile is used, all the available
1049
// profile information will be annotated in PreLink phase so there is
1050
// no need to load the profile again in PostLink.
1051
bool LoadSampleProfile =
1052
HasSampleProfile &&
1053
!(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1054
1055
// During the ThinLTO backend phase we perform early indirect call promotion
1056
// here, before globalopt. Otherwise imported available_externally functions
1057
// look unreferenced and are removed. If we are going to load the sample
1058
// profile then defer until later.
1059
// TODO: See if we can move later and consolidate with the location where
1060
// we perform ICP when we are loading a sample profile.
1061
// TODO: We pass HasSampleProfile (whether there was a sample profile file
1062
// passed to the compile) to the SamplePGO flag of ICP. This is used to
1063
// determine whether the new direct calls are annotated with prof metadata.
1064
// Ideally this should be determined from whether the IR is annotated with
1065
// sample profile, and not whether the a sample profile was provided on the
1066
// command line. E.g. for flattened profiles where we will not be reloading
1067
// the sample profile in the ThinLTO backend, we ideally shouldn't have to
1068
// provide the sample profile file.
1069
if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1070
MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1071
1072
// Create an early function pass manager to cleanup the output of the
1073
// frontend. Not necessary with LTO post link pipelines since the pre link
1074
// pipeline already cleaned up the frontend output.
1075
if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1076
// Do basic inference of function attributes from known properties of system
1077
// libraries and other oracles.
1078
MPM.addPass(InferFunctionAttrsPass());
1079
MPM.addPass(CoroEarlyPass());
1080
1081
FunctionPassManager EarlyFPM;
1082
EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1083
// Lower llvm.expect to metadata before attempting transforms.
1084
// Compare/branch metadata may alter the behavior of passes like
1085
// SimplifyCFG.
1086
EarlyFPM.addPass(LowerExpectIntrinsicPass());
1087
EarlyFPM.addPass(SimplifyCFGPass());
1088
EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
1089
EarlyFPM.addPass(EarlyCSEPass());
1090
if (Level == OptimizationLevel::O3)
1091
EarlyFPM.addPass(CallSiteSplittingPass());
1092
MPM.addPass(createModuleToFunctionPassAdaptor(
1093
std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1094
}
1095
1096
if (LoadSampleProfile) {
1097
// Annotate sample profile right after early FPM to ensure freshness of
1098
// the debug info.
1099
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1100
PGOOpt->ProfileRemappingFile, Phase));
1101
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1102
// RequireAnalysisPass for PSI before subsequent non-module passes.
1103
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1104
// Do not invoke ICP in the LTOPrelink phase as it makes it hard
1105
// for the profile annotation to be accurate in the LTO backend.
1106
if (!isLTOPreLink(Phase))
1107
// We perform early indirect call promotion here, before globalopt.
1108
// This is important for the ThinLTO backend phase because otherwise
1109
// imported available_externally functions look unreferenced and are
1110
// removed.
1111
MPM.addPass(
1112
PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1113
}
1114
1115
// Try to perform OpenMP specific optimizations on the module. This is a
1116
// (quick!) no-op if there are no OpenMP runtime calls present in the module.
1117
MPM.addPass(OpenMPOptPass());
1118
1119
if (AttributorRun & AttributorRunOption::MODULE)
1120
MPM.addPass(AttributorPass());
1121
1122
// Lower type metadata and the type.test intrinsic in the ThinLTO
1123
// post link pipeline after ICP. This is to enable usage of the type
1124
// tests in ICP sequences.
1125
if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1126
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1127
1128
invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
1129
1130
// Interprocedural constant propagation now that basic cleanup has occurred
1131
// and prior to optimizing globals.
1132
// FIXME: This position in the pipeline hasn't been carefully considered in
1133
// years, it should be re-analyzed.
1134
MPM.addPass(IPSCCPPass(
1135
IPSCCPOptions(/*AllowFuncSpec=*/
1136
Level != OptimizationLevel::Os &&
1137
Level != OptimizationLevel::Oz &&
1138
!isLTOPreLink(Phase))));
1139
1140
// Attach metadata to indirect call sites indicating the set of functions
1141
// they may target at run-time. This should follow IPSCCP.
1142
MPM.addPass(CalledValuePropagationPass());
1143
1144
// Optimize globals to try and fold them into constants.
1145
MPM.addPass(GlobalOptPass());
1146
1147
// Create a small function pass pipeline to cleanup after all the global
1148
// optimizations.
1149
FunctionPassManager GlobalCleanupPM;
1150
// FIXME: Should this instead by a run of SROA?
1151
GlobalCleanupPM.addPass(PromotePass());
1152
GlobalCleanupPM.addPass(InstCombinePass());
1153
invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1154
GlobalCleanupPM.addPass(
1155
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1156
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1157
PTO.EagerlyInvalidateAnalyses));
1158
1159
// We already asserted this happens in non-FullLTOPostLink earlier.
1160
const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1161
const bool IsPGOPreLink = PGOOpt && IsPreLink;
1162
const bool IsPGOInstrGen =
1163
IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1164
const bool IsPGOInstrUse =
1165
IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1166
const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1167
// We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1168
// enable ctx profiling from the frontend.
1169
assert(
1170
!(IsPGOInstrGen && PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) &&
1171
"Enabling both instrumented FDO and contextual instrumentation is not "
1172
"supported.");
1173
// Enable contextual profiling instrumentation.
1174
const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1175
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
1176
1177
if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen)
1178
addPreInlinerPasses(MPM, Level, Phase);
1179
1180
// Add all the requested passes for instrumentation PGO, if requested.
1181
if (IsPGOInstrGen || IsPGOInstrUse) {
1182
addPGOInstrPasses(MPM, Level,
1183
/*RunProfileGen=*/IsPGOInstrGen,
1184
/*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1185
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1186
PGOOpt->FS);
1187
} else if (IsCtxProfGen) {
1188
MPM.addPass(PGOInstrumentationGen(false));
1189
addPostPGOLoopRotation(MPM, Level);
1190
MPM.addPass(PGOCtxProfLoweringPass());
1191
}
1192
1193
if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1194
MPM.addPass(PGOIndirectCallPromotion(false, false));
1195
1196
if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1197
MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1198
EnableSampledInstr));
1199
1200
if (IsMemprofUse)
1201
MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1202
1203
// Synthesize function entry counts for non-PGO compilation.
1204
if (EnableSyntheticCounts && !PGOOpt)
1205
MPM.addPass(SyntheticCountsPropagation());
1206
1207
if (EnablePGOForceFunctionAttrs && PGOOpt)
1208
MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1209
1210
MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1211
1212
if (EnableModuleInliner)
1213
MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
1214
else
1215
MPM.addPass(buildInlinerPipeline(Level, Phase));
1216
1217
// Remove any dead arguments exposed by cleanups, constant folding globals,
1218
// and argument promotion.
1219
MPM.addPass(DeadArgumentEliminationPass());
1220
1221
MPM.addPass(CoroCleanupPass());
1222
1223
// Optimize globals now that functions are fully simplified.
1224
MPM.addPass(GlobalOptPass());
1225
MPM.addPass(GlobalDCEPass());
1226
1227
return MPM;
1228
}
1229
1230
/// TODO: Should LTO cause any differences to this set of passes?
1231
void PassBuilder::addVectorPasses(OptimizationLevel Level,
1232
FunctionPassManager &FPM, bool IsFullLTO) {
1233
FPM.addPass(LoopVectorizePass(
1234
LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1235
1236
if (EnableInferAlignmentPass)
1237
FPM.addPass(InferAlignmentPass());
1238
if (IsFullLTO) {
1239
// The vectorizer may have significantly shortened a loop body; unroll
1240
// again. Unroll small loops to hide loop backedge latency and saturate any
1241
// parallel execution resources of an out-of-order processor. We also then
1242
// need to clean up redundancies and loop invariant code.
1243
// FIXME: It would be really good to use a loop-integrated instruction
1244
// combiner for cleanup here so that the unrolling and LICM can be pipelined
1245
// across the loop nests.
1246
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1247
if (EnableUnrollAndJam && PTO.LoopUnrolling)
1248
FPM.addPass(createFunctionToLoopPassAdaptor(
1249
LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1250
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1251
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1252
PTO.ForgetAllSCEVInLoopUnroll)));
1253
FPM.addPass(WarnMissedTransformationsPass());
1254
// Now that we are done with loop unrolling, be it either by LoopVectorizer,
1255
// or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1256
// become constant-offset, thus enabling SROA and alloca promotion. Do so.
1257
// NOTE: we are very late in the pipeline, and we don't have any LICM
1258
// or SimplifyCFG passes scheduled after us, that would cleanup
1259
// the CFG mess this may created if allowed to modify CFG, so forbid that.
1260
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1261
}
1262
1263
if (!IsFullLTO) {
1264
// Eliminate loads by forwarding stores from the previous iteration to loads
1265
// of the current iteration.
1266
FPM.addPass(LoopLoadEliminationPass());
1267
}
1268
// Cleanup after the loop optimization passes.
1269
FPM.addPass(InstCombinePass());
1270
1271
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1272
ExtraVectorPassManager ExtraPasses;
1273
// At higher optimization levels, try to clean up any runtime overlap and
1274
// alignment checks inserted by the vectorizer. We want to track correlated
1275
// runtime checks for two inner loops in the same outer loop, fold any
1276
// common computations, hoist loop-invariant aspects out of any outer loop,
1277
// and unswitch the runtime checks if possible. Once hoisted, we may have
1278
// dead (or speculatable) control flows or more combining opportunities.
1279
ExtraPasses.addPass(EarlyCSEPass());
1280
ExtraPasses.addPass(CorrelatedValuePropagationPass());
1281
ExtraPasses.addPass(InstCombinePass());
1282
LoopPassManager LPM;
1283
LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1284
/*AllowSpeculation=*/true));
1285
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1286
OptimizationLevel::O3));
1287
ExtraPasses.addPass(
1288
createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1289
/*UseBlockFrequencyInfo=*/true));
1290
ExtraPasses.addPass(
1291
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1292
ExtraPasses.addPass(InstCombinePass());
1293
FPM.addPass(std::move(ExtraPasses));
1294
}
1295
1296
// Now that we've formed fast to execute loop structures, we do further
1297
// optimizations. These are run afterward as they might block doing complex
1298
// analyses and transforms such as what are needed for loop vectorization.
1299
1300
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
1301
// GVN, loop transforms, and others have already run, so it's now better to
1302
// convert to more optimized IR using more aggressive simplify CFG options.
1303
// The extra sinking transform can create larger basic blocks, so do this
1304
// before SLP vectorization.
1305
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1306
.forwardSwitchCondToPhi(true)
1307
.convertSwitchRangeToICmp(true)
1308
.convertSwitchToLookupTable(true)
1309
.needCanonicalLoops(false)
1310
.hoistCommonInsts(true)
1311
.sinkCommonInsts(true)));
1312
1313
if (IsFullLTO) {
1314
FPM.addPass(SCCPPass());
1315
FPM.addPass(InstCombinePass());
1316
FPM.addPass(BDCEPass());
1317
}
1318
1319
// Optimize parallel scalar instruction chains into SIMD instructions.
1320
if (PTO.SLPVectorization) {
1321
FPM.addPass(SLPVectorizerPass());
1322
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1323
FPM.addPass(EarlyCSEPass());
1324
}
1325
}
1326
// Enhance/cleanup vector code.
1327
FPM.addPass(VectorCombinePass());
1328
1329
if (!IsFullLTO) {
1330
FPM.addPass(InstCombinePass());
1331
// Unroll small loops to hide loop backedge latency and saturate any
1332
// parallel execution resources of an out-of-order processor. We also then
1333
// need to clean up redundancies and loop invariant code.
1334
// FIXME: It would be really good to use a loop-integrated instruction
1335
// combiner for cleanup here so that the unrolling and LICM can be pipelined
1336
// across the loop nests.
1337
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1338
if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1339
FPM.addPass(createFunctionToLoopPassAdaptor(
1340
LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1341
}
1342
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1343
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1344
PTO.ForgetAllSCEVInLoopUnroll)));
1345
FPM.addPass(WarnMissedTransformationsPass());
1346
// Now that we are done with loop unrolling, be it either by LoopVectorizer,
1347
// or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1348
// become constant-offset, thus enabling SROA and alloca promotion. Do so.
1349
// NOTE: we are very late in the pipeline, and we don't have any LICM
1350
// or SimplifyCFG passes scheduled after us, that would cleanup
1351
// the CFG mess this may created if allowed to modify CFG, so forbid that.
1352
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1353
}
1354
1355
if (EnableInferAlignmentPass)
1356
FPM.addPass(InferAlignmentPass());
1357
FPM.addPass(InstCombinePass());
1358
1359
// This is needed for two reasons:
1360
// 1. It works around problems that instcombine introduces, such as sinking
1361
// expensive FP divides into loops containing multiplications using the
1362
// divide result.
1363
// 2. It helps to clean up some loop-invariant code created by the loop
1364
// unroll pass when IsFullLTO=false.
1365
FPM.addPass(createFunctionToLoopPassAdaptor(
1366
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1367
/*AllowSpeculation=*/true),
1368
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1369
1370
// Now that we've vectorized and unrolled loops, we may have more refined
1371
// alignment information, try to re-derive it here.
1372
FPM.addPass(AlignmentFromAssumptionsPass());
1373
}
1374
1375
ModulePassManager
1376
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1377
ThinOrFullLTOPhase LTOPhase) {
1378
const bool LTOPreLink = isLTOPreLink(LTOPhase);
1379
ModulePassManager MPM;
1380
1381
// Run partial inlining pass to partially inline functions that have
1382
// large bodies.
1383
if (RunPartialInlining)
1384
MPM.addPass(PartialInlinerPass());
1385
1386
// Remove avail extern fns and globals definitions since we aren't compiling
1387
// an object file for later LTO. For LTO we want to preserve these so they
1388
// are eligible for inlining at link-time. Note if they are unreferenced they
1389
// will be removed by GlobalDCE later, so this only impacts referenced
1390
// available externally globals. Eventually they will be suppressed during
1391
// codegen, but eliminating here enables more opportunity for GlobalDCE as it
1392
// may make globals referenced by available external functions dead and saves
1393
// running remaining passes on the eliminated functions. These should be
1394
// preserved during prelinking for link-time inlining decisions.
1395
if (!LTOPreLink)
1396
MPM.addPass(EliminateAvailableExternallyPass());
1397
1398
if (EnableOrderFileInstrumentation)
1399
MPM.addPass(InstrOrderFilePass());
1400
1401
// Do RPO function attribute inference across the module to forward-propagate
1402
// attributes where applicable.
1403
// FIXME: Is this really an optimization rather than a canonicalization?
1404
MPM.addPass(ReversePostOrderFunctionAttrsPass());
1405
1406
// Do a post inline PGO instrumentation and use pass. This is a context
1407
// sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1408
// cross-module inline has not been done yet. The context sensitive
1409
// instrumentation is after all the inlines are done.
1410
if (!LTOPreLink && PGOOpt) {
1411
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1412
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1413
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1414
PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1415
PGOOpt->FS);
1416
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1417
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1418
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1419
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1420
PGOOpt->FS);
1421
}
1422
1423
// Re-compute GlobalsAA here prior to function passes. This is particularly
1424
// useful as the above will have inlined, DCE'ed, and function-attr
1425
// propagated everything. We should at this point have a reasonably minimal
1426
// and richly annotated call graph. By computing aliasing and mod/ref
1427
// information for all local globals here, the late loop passes and notably
1428
// the vectorizer will be able to use them to help recognize vectorizable
1429
// memory operations.
1430
if (EnableGlobalAnalyses)
1431
MPM.addPass(RecomputeGlobalsAAPass());
1432
1433
invokeOptimizerEarlyEPCallbacks(MPM, Level);
1434
1435
FunctionPassManager OptimizePM;
1436
// Scheduling LoopVersioningLICM when inlining is over, because after that
1437
// we may see more accurate aliasing. Reason to run this late is that too
1438
// early versioning may prevent further inlining due to increase of code
1439
// size. Other optimizations which runs later might get benefit of no-alias
1440
// assumption in clone loop.
1441
if (UseLoopVersioningLICM) {
1442
OptimizePM.addPass(
1443
createFunctionToLoopPassAdaptor(LoopVersioningLICMPass()));
1444
// LoopVersioningLICM pass might increase new LICM opportunities.
1445
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1446
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1447
/*AllowSpeculation=*/true),
1448
/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1449
}
1450
1451
OptimizePM.addPass(Float2IntPass());
1452
OptimizePM.addPass(LowerConstantIntrinsicsPass());
1453
1454
if (EnableMatrix) {
1455
OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1456
OptimizePM.addPass(EarlyCSEPass());
1457
}
1458
1459
// CHR pass should only be applied with the profile information.
1460
// The check is to check the profile summary information in CHR.
1461
if (EnableCHR && Level == OptimizationLevel::O3)
1462
OptimizePM.addPass(ControlHeightReductionPass());
1463
1464
// FIXME: We need to run some loop optimizations to re-rotate loops after
1465
// simplifycfg and others undo their rotation.
1466
1467
// Optimize the loop execution. These passes operate on entire loop nests
1468
// rather than on each loop in an inside-out manner, and so they are actually
1469
// function passes.
1470
1471
invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1472
1473
LoopPassManager LPM;
1474
// First rotate loops that may have been un-rotated by prior passes.
1475
// Disable header duplication at -Oz.
1476
LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
1477
Level != OptimizationLevel::Oz,
1478
LTOPreLink));
1479
// Some loops may have become dead by now. Try to delete them.
1480
// FIXME: see discussion in https://reviews.llvm.org/D112851,
1481
// this may need to be revisited once we run GVN before loop deletion
1482
// in the simplification pipeline.
1483
LPM.addPass(LoopDeletionPass());
1484
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1485
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1486
1487
// Distribute loops to allow partial vectorization. I.e. isolate dependences
1488
// into separate loop that would otherwise inhibit vectorization. This is
1489
// currently only performed for loops marked with the metadata
1490
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1491
OptimizePM.addPass(LoopDistributePass());
1492
1493
// Populates the VFABI attribute with the scalar-to-vector mappings
1494
// from the TargetLibraryInfo.
1495
OptimizePM.addPass(InjectTLIMappings());
1496
1497
addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1498
1499
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
1500
// canonicalization pass that enables other optimizations. As a result,
1501
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1502
// result too early.
1503
OptimizePM.addPass(LoopSinkPass());
1504
1505
// And finally clean up LCSSA form before generating code.
1506
OptimizePM.addPass(InstSimplifyPass());
1507
1508
// This hoists/decomposes div/rem ops. It should run after other sink/hoist
1509
// passes to avoid re-sinking, but before SimplifyCFG because it can allow
1510
// flattening of blocks.
1511
OptimizePM.addPass(DivRemPairsPass());
1512
1513
// Try to annotate calls that were created during optimization.
1514
OptimizePM.addPass(TailCallElimPass());
1515
1516
// LoopSink (and other loop passes since the last simplifyCFG) might have
1517
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1518
OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1519
.convertSwitchRangeToICmp(true)
1520
.speculateUnpredictables(true)));
1521
1522
// Add the core optimizing pipeline.
1523
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1524
PTO.EagerlyInvalidateAnalyses));
1525
1526
invokeOptimizerLastEPCallbacks(MPM, Level);
1527
1528
// Split out cold code. Splitting is done late to avoid hiding context from
1529
// other optimizations and inadvertently regressing performance. The tradeoff
1530
// is that this has a higher code size cost than splitting early.
1531
if (EnableHotColdSplit && !LTOPreLink)
1532
MPM.addPass(HotColdSplittingPass());
1533
1534
// Search the code for similar regions of code. If enough similar regions can
1535
// be found where extracting the regions into their own function will decrease
1536
// the size of the program, we extract the regions, a deduplicate the
1537
// structurally similar regions.
1538
if (EnableIROutliner)
1539
MPM.addPass(IROutlinerPass());
1540
1541
// Now we need to do some global optimization transforms.
1542
// FIXME: It would seem like these should come first in the optimization
1543
// pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1544
// ordering here.
1545
MPM.addPass(GlobalDCEPass());
1546
MPM.addPass(ConstantMergePass());
1547
1548
// Merge functions if requested. It has a better chance to merge functions
1549
// after ConstantMerge folded jump tables.
1550
if (PTO.MergeFunctions)
1551
MPM.addPass(MergeFunctionsPass());
1552
1553
if (PTO.CallGraphProfile && !LTOPreLink)
1554
MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
1555
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));
1556
1557
// TODO: Relative look table converter pass caused an issue when full lto is
1558
// enabled. See https://reviews.llvm.org/D94355 for more details.
1559
// Until the issue fixed, disable this pass during pre-linking phase.
1560
if (!LTOPreLink)
1561
MPM.addPass(RelLookupTableConverterPass());
1562
1563
return MPM;
1564
}
1565
1566
ModulePassManager
1567
PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1568
bool LTOPreLink) {
1569
if (Level == OptimizationLevel::O0)
1570
return buildO0DefaultPipeline(Level, LTOPreLink);
1571
1572
ModulePassManager MPM;
1573
1574
// Convert @llvm.global.annotations to !annotation metadata.
1575
MPM.addPass(Annotation2MetadataPass());
1576
1577
// Force any function attributes we want the rest of the pipeline to observe.
1578
MPM.addPass(ForceFunctionAttrsPass());
1579
1580
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1581
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1582
1583
// Apply module pipeline start EP callback.
1584
invokePipelineStartEPCallbacks(MPM, Level);
1585
1586
const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1587
? ThinOrFullLTOPhase::FullLTOPreLink
1588
: ThinOrFullLTOPhase::None;
1589
// Add the core simplification pipeline.
1590
MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase));
1591
1592
// Now add the optimization pipeline.
1593
MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase));
1594
1595
if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1596
PGOOpt->Action == PGOOptions::SampleUse)
1597
MPM.addPass(PseudoProbeUpdatePass());
1598
1599
// Emit annotation remarks.
1600
addAnnotationRemarksPass(MPM);
1601
1602
if (LTOPreLink)
1603
addRequiredLTOPreLinkPasses(MPM);
1604
return MPM;
1605
}
1606
1607
ModulePassManager
1608
PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1609
bool EmitSummary) {
1610
ModulePassManager MPM;
1611
if (ThinLTO)
1612
MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
1613
else
1614
MPM.addPass(buildLTOPreLinkDefaultPipeline(Level));
1615
MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1616
1617
// Use the ThinLTO post-link pipeline with sample profiling
1618
if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1619
MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1620
else {
1621
// otherwise, just use module optimization
1622
MPM.addPass(
1623
buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
1624
// Emit annotation remarks.
1625
addAnnotationRemarksPass(MPM);
1626
}
1627
return MPM;
1628
}
1629
1630
ModulePassManager
1631
PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1632
if (Level == OptimizationLevel::O0)
1633
return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1634
1635
ModulePassManager MPM;
1636
1637
// Convert @llvm.global.annotations to !annotation metadata.
1638
MPM.addPass(Annotation2MetadataPass());
1639
1640
// Force any function attributes we want the rest of the pipeline to observe.
1641
MPM.addPass(ForceFunctionAttrsPass());
1642
1643
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1644
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1645
1646
// Apply module pipeline start EP callback.
1647
invokePipelineStartEPCallbacks(MPM, Level);
1648
1649
// If we are planning to perform ThinLTO later, we don't bloat the code with
1650
// unrolling/vectorization/... now. Just simplify the module as much as we
1651
// can.
1652
MPM.addPass(buildModuleSimplificationPipeline(
1653
Level, ThinOrFullLTOPhase::ThinLTOPreLink));
1654
1655
// Run partial inlining pass to partially inline functions that have
1656
// large bodies.
1657
// FIXME: It isn't clear whether this is really the right place to run this
1658
// in ThinLTO. Because there is another canonicalization and simplification
1659
// phase that will run after the thin link, running this here ends up with
1660
// less information than will be available later and it may grow functions in
1661
// ways that aren't beneficial.
1662
if (RunPartialInlining)
1663
MPM.addPass(PartialInlinerPass());
1664
1665
if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1666
PGOOpt->Action == PGOOptions::SampleUse)
1667
MPM.addPass(PseudoProbeUpdatePass());
1668
1669
// Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1670
// optimization is going to be done in PostLink stage, but clang can't add
1671
// callbacks there in case of in-process ThinLTO called by linker.
1672
invokeOptimizerEarlyEPCallbacks(MPM, Level);
1673
invokeOptimizerLastEPCallbacks(MPM, Level);
1674
1675
// Emit annotation remarks.
1676
addAnnotationRemarksPass(MPM);
1677
1678
addRequiredLTOPreLinkPasses(MPM);
1679
1680
return MPM;
1681
}
1682
1683
ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1684
OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1685
ModulePassManager MPM;
1686
1687
if (ImportSummary) {
1688
// For ThinLTO we must apply the context disambiguation decisions early, to
1689
// ensure we can correctly match the callsites to summary data.
1690
if (EnableMemProfContextDisambiguation)
1691
MPM.addPass(MemProfContextDisambiguation(ImportSummary));
1692
1693
// These passes import type identifier resolutions for whole-program
1694
// devirtualization and CFI. They must run early because other passes may
1695
// disturb the specific instruction patterns that these passes look for,
1696
// creating dependencies on resolutions that may not appear in the summary.
1697
//
1698
// For example, GVN may transform the pattern assume(type.test) appearing in
1699
// two basic blocks into assume(phi(type.test, type.test)), which would
1700
// transform a dependency on a WPD resolution into a dependency on a type
1701
// identifier resolution for CFI.
1702
//
1703
// Also, WPD has access to more precise information than ICP and can
1704
// devirtualize more effectively, so it should operate on the IR first.
1705
//
1706
// The WPD and LowerTypeTest passes need to run at -O0 to lower type
1707
// metadata and intrinsics.
1708
MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1709
MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1710
}
1711
1712
if (Level == OptimizationLevel::O0) {
1713
// Run a second time to clean up any type tests left behind by WPD for use
1714
// in ICP.
1715
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1716
// Drop available_externally and unreferenced globals. This is necessary
1717
// with ThinLTO in order to avoid leaving undefined references to dead
1718
// globals in the object file.
1719
MPM.addPass(EliminateAvailableExternallyPass());
1720
MPM.addPass(GlobalDCEPass());
1721
return MPM;
1722
}
1723
1724
// Add the core simplification pipeline.
1725
MPM.addPass(buildModuleSimplificationPipeline(
1726
Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1727
1728
// Now add the optimization pipeline.
1729
MPM.addPass(buildModuleOptimizationPipeline(
1730
Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1731
1732
// Emit annotation remarks.
1733
addAnnotationRemarksPass(MPM);
1734
1735
return MPM;
1736
}
1737
1738
ModulePassManager
1739
PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1740
// FIXME: We should use a customized pre-link pipeline!
1741
return buildPerModuleDefaultPipeline(Level,
1742
/* LTOPreLink */ true);
1743
}
1744
1745
ModulePassManager
1746
PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1747
ModuleSummaryIndex *ExportSummary) {
1748
ModulePassManager MPM;
1749
1750
invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1751
1752
// Create a function that performs CFI checks for cross-DSO calls with targets
1753
// in the current module.
1754
MPM.addPass(CrossDSOCFIPass());
1755
1756
if (Level == OptimizationLevel::O0) {
1757
// The WPD and LowerTypeTest passes need to run at -O0 to lower type
1758
// metadata and intrinsics.
1759
MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1760
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1761
// Run a second time to clean up any type tests left behind by WPD for use
1762
// in ICP.
1763
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1764
1765
invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1766
1767
// Emit annotation remarks.
1768
addAnnotationRemarksPass(MPM);
1769
1770
return MPM;
1771
}
1772
1773
if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1774
// Load sample profile before running the LTO optimization pipeline.
1775
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1776
PGOOpt->ProfileRemappingFile,
1777
ThinOrFullLTOPhase::FullLTOPostLink));
1778
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1779
// RequireAnalysisPass for PSI before subsequent non-module passes.
1780
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1781
}
1782
1783
// Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1784
MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1785
1786
// Remove unused virtual tables to improve the quality of code generated by
1787
// whole-program devirtualization and bitset lowering.
1788
MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1789
1790
// Do basic inference of function attributes from known properties of system
1791
// libraries and other oracles.
1792
MPM.addPass(InferFunctionAttrsPass());
1793
1794
if (Level.getSpeedupLevel() > 1) {
1795
MPM.addPass(createModuleToFunctionPassAdaptor(
1796
CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
1797
1798
// Indirect call promotion. This should promote all the targets that are
1799
// left by the earlier promotion pass that promotes intra-module targets.
1800
// This two-step promotion is to save the compile time. For LTO, it should
1801
// produce the same result as if we only do promotion here.
1802
MPM.addPass(PGOIndirectCallPromotion(
1803
true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1804
1805
// Propagate constants at call sites into the functions they call. This
1806
// opens opportunities for globalopt (and inlining) by substituting function
1807
// pointers passed as arguments to direct uses of functions.
1808
MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1809
Level != OptimizationLevel::Os &&
1810
Level != OptimizationLevel::Oz)));
1811
1812
// Attach metadata to indirect call sites indicating the set of functions
1813
// they may target at run-time. This should follow IPSCCP.
1814
MPM.addPass(CalledValuePropagationPass());
1815
}
1816
1817
// Now deduce any function attributes based in the current code.
1818
MPM.addPass(
1819
createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1820
1821
// Do RPO function attribute inference across the module to forward-propagate
1822
// attributes where applicable.
1823
// FIXME: Is this really an optimization rather than a canonicalization?
1824
MPM.addPass(ReversePostOrderFunctionAttrsPass());
1825
1826
// Use in-range annotations on GEP indices to split globals where beneficial.
1827
MPM.addPass(GlobalSplitPass());
1828
1829
// Run whole program optimization of virtual call when the list of callees
1830
// is fixed.
1831
MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1832
1833
// Stop here at -O1.
1834
if (Level == OptimizationLevel::O1) {
1835
// The LowerTypeTestsPass needs to run to lower type metadata and the
1836
// type.test intrinsics. The pass does nothing if CFI is disabled.
1837
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1838
// Run a second time to clean up any type tests left behind by WPD for use
1839
// in ICP (which is performed earlier than this in the regular LTO
1840
// pipeline).
1841
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1842
1843
invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1844
1845
// Emit annotation remarks.
1846
addAnnotationRemarksPass(MPM);
1847
1848
return MPM;
1849
}
1850
1851
// Optimize globals to try and fold them into constants.
1852
MPM.addPass(GlobalOptPass());
1853
1854
// Promote any localized globals to SSA registers.
1855
MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
1856
1857
// Linking modules together can lead to duplicate global constant, only
1858
// keep one copy of each constant.
1859
MPM.addPass(ConstantMergePass());
1860
1861
// Remove unused arguments from functions.
1862
MPM.addPass(DeadArgumentEliminationPass());
1863
1864
// Reduce the code after globalopt and ipsccp. Both can open up significant
1865
// simplification opportunities, and both can propagate functions through
1866
// function pointers. When this happens, we often have to resolve varargs
1867
// calls, etc, so let instcombine do this.
1868
FunctionPassManager PeepholeFPM;
1869
PeepholeFPM.addPass(InstCombinePass());
1870
if (Level.getSpeedupLevel() > 1)
1871
PeepholeFPM.addPass(AggressiveInstCombinePass());
1872
invokePeepholeEPCallbacks(PeepholeFPM, Level);
1873
1874
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1875
PTO.EagerlyInvalidateAnalyses));
1876
1877
// Note: historically, the PruneEH pass was run first to deduce nounwind and
1878
// generally clean up exception handling overhead. It isn't clear this is
1879
// valuable as the inliner doesn't currently care whether it is inlining an
1880
// invoke or a call.
1881
// Run the inliner now.
1882
if (EnableModuleInliner) {
1883
MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
1884
UseInlineAdvisor,
1885
ThinOrFullLTOPhase::FullLTOPostLink));
1886
} else {
1887
MPM.addPass(ModuleInlinerWrapperPass(
1888
getInlineParamsFromOptLevel(Level),
1889
/* MandatoryFirst */ true,
1890
InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
1891
InlinePass::CGSCCInliner}));
1892
}
1893
1894
// Perform context disambiguation after inlining, since that would reduce the
1895
// amount of additional cloning required to distinguish the allocation
1896
// contexts.
1897
if (EnableMemProfContextDisambiguation)
1898
MPM.addPass(MemProfContextDisambiguation());
1899
1900
// Optimize globals again after we ran the inliner.
1901
MPM.addPass(GlobalOptPass());
1902
1903
// Run the OpenMPOpt pass again after global optimizations.
1904
MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1905
1906
// Garbage collect dead functions.
1907
MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1908
1909
// If we didn't decide to inline a function, check to see if we can
1910
// transform it to pass arguments by value instead of by reference.
1911
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
1912
1913
FunctionPassManager FPM;
1914
// The IPO Passes may leave cruft around. Clean up after them.
1915
FPM.addPass(InstCombinePass());
1916
invokePeepholeEPCallbacks(FPM, Level);
1917
1918
if (EnableConstraintElimination)
1919
FPM.addPass(ConstraintEliminationPass());
1920
1921
FPM.addPass(JumpThreadingPass());
1922
1923
// Do a post inline PGO instrumentation and use pass. This is a context
1924
// sensitive PGO pass.
1925
if (PGOOpt) {
1926
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1927
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1928
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1929
PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1930
PGOOpt->FS);
1931
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1932
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1933
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1934
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1935
PGOOpt->FS);
1936
}
1937
1938
// Break up allocas
1939
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
1940
1941
// LTO provides additional opportunities for tailcall elimination due to
1942
// link-time inlining, and visibility of nocapture attribute.
1943
FPM.addPass(TailCallElimPass());
1944
1945
// Run a few AA driver optimizations here and now to cleanup the code.
1946
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
1947
PTO.EagerlyInvalidateAnalyses));
1948
1949
MPM.addPass(
1950
createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1951
1952
// Require the GlobalsAA analysis for the module so we can query it within
1953
// MainFPM.
1954
if (EnableGlobalAnalyses) {
1955
MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
1956
// Invalidate AAManager so it can be recreated and pick up the newly
1957
// available GlobalsAA.
1958
MPM.addPass(
1959
createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
1960
}
1961
1962
FunctionPassManager MainFPM;
1963
MainFPM.addPass(createFunctionToLoopPassAdaptor(
1964
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1965
/*AllowSpeculation=*/true),
1966
/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1967
1968
if (RunNewGVN)
1969
MainFPM.addPass(NewGVNPass());
1970
else
1971
MainFPM.addPass(GVNPass());
1972
1973
// Remove dead memcpy()'s.
1974
MainFPM.addPass(MemCpyOptPass());
1975
1976
// Nuke dead stores.
1977
MainFPM.addPass(DSEPass());
1978
MainFPM.addPass(MoveAutoInitPass());
1979
MainFPM.addPass(MergedLoadStoreMotionPass());
1980
1981
LoopPassManager LPM;
1982
if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1983
LPM.addPass(LoopFlattenPass());
1984
LPM.addPass(IndVarSimplifyPass());
1985
LPM.addPass(LoopDeletionPass());
1986
// FIXME: Add loop interchange.
1987
1988
// Unroll small loops and perform peeling.
1989
LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1990
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
1991
PTO.ForgetAllSCEVInLoopUnroll));
1992
// The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1993
// *All* loop passes must preserve it, in order to be able to use it.
1994
MainFPM.addPass(createFunctionToLoopPassAdaptor(
1995
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1996
1997
MainFPM.addPass(LoopDistributePass());
1998
1999
addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2000
2001
// Run the OpenMPOpt CGSCC pass again late.
2002
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
2003
OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
2004
2005
invokePeepholeEPCallbacks(MainFPM, Level);
2006
MainFPM.addPass(JumpThreadingPass());
2007
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2008
PTO.EagerlyInvalidateAnalyses));
2009
2010
// Lower type metadata and the type.test intrinsic. This pass supports
2011
// clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2012
// to be run at link time if CFI is enabled. This pass does nothing if
2013
// CFI is disabled.
2014
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2015
// Run a second time to clean up any type tests left behind by WPD for use
2016
// in ICP (which is performed earlier than this in the regular LTO pipeline).
2017
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
2018
2019
// Enable splitting late in the FullLTO post-link pipeline.
2020
if (EnableHotColdSplit)
2021
MPM.addPass(HotColdSplittingPass());
2022
2023
// Add late LTO optimization passes.
2024
FunctionPassManager LateFPM;
2025
2026
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
2027
// canonicalization pass that enables other optimizations. As a result,
2028
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2029
// result too early.
2030
LateFPM.addPass(LoopSinkPass());
2031
2032
// This hoists/decomposes div/rem ops. It should run after other sink/hoist
2033
// passes to avoid re-sinking, but before SimplifyCFG because it can allow
2034
// flattening of blocks.
2035
LateFPM.addPass(DivRemPairsPass());
2036
2037
// Delete basic blocks, which optimization passes may have killed.
2038
LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
2039
.convertSwitchRangeToICmp(true)
2040
.hoistCommonInsts(true)
2041
.speculateUnpredictables(true)));
2042
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2043
2044
// Drop bodies of available eternally objects to improve GlobalDCE.
2045
MPM.addPass(EliminateAvailableExternallyPass());
2046
2047
// Now that we have optimized the program, discard unreachable functions.
2048
MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2049
2050
if (PTO.MergeFunctions)
2051
MPM.addPass(MergeFunctionsPass());
2052
2053
if (PTO.CallGraphProfile)
2054
MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2055
2056
invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2057
2058
// Emit annotation remarks.
2059
addAnnotationRemarksPass(MPM);
2060
2061
return MPM;
2062
}
2063
2064
ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2065
bool LTOPreLink) {
2066
assert(Level == OptimizationLevel::O0 &&
2067
"buildO0DefaultPipeline should only be used with O0");
2068
2069
ModulePassManager MPM;
2070
2071
// Perform pseudo probe instrumentation in O0 mode. This is for the
2072
// consistency between different build modes. For example, a LTO build can be
2073
// mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2074
// the postlink will require pseudo probe instrumentation in the prelink.
2075
if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2076
MPM.addPass(SampleProfileProbePass(TM));
2077
2078
if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2079
PGOOpt->Action == PGOOptions::IRUse))
2080
addPGOInstrPassesForO0(
2081
MPM,
2082
/*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2083
/*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2084
PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2085
2086
// Instrument function entry and exit before all inlining.
2087
MPM.addPass(createModuleToFunctionPassAdaptor(
2088
EntryExitInstrumenterPass(/*PostInlining=*/false)));
2089
2090
invokePipelineStartEPCallbacks(MPM, Level);
2091
2092
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2093
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
2094
2095
invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
2096
2097
// Build a minimal pipeline based on the semantics required by LLVM,
2098
// which is just that always inlining occurs. Further, disable generating
2099
// lifetime intrinsics to avoid enabling further optimizations during
2100
// code generation.
2101
MPM.addPass(AlwaysInlinerPass(
2102
/*InsertLifetimeIntrinsics=*/false));
2103
2104
if (PTO.MergeFunctions)
2105
MPM.addPass(MergeFunctionsPass());
2106
2107
if (EnableMatrix)
2108
MPM.addPass(
2109
createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
2110
2111
if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2112
CGSCCPassManager CGPM;
2113
invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2114
if (!CGPM.isEmpty())
2115
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2116
}
2117
if (!LateLoopOptimizationsEPCallbacks.empty()) {
2118
LoopPassManager LPM;
2119
invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2120
if (!LPM.isEmpty()) {
2121
MPM.addPass(createModuleToFunctionPassAdaptor(
2122
createFunctionToLoopPassAdaptor(std::move(LPM))));
2123
}
2124
}
2125
if (!LoopOptimizerEndEPCallbacks.empty()) {
2126
LoopPassManager LPM;
2127
invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2128
if (!LPM.isEmpty()) {
2129
MPM.addPass(createModuleToFunctionPassAdaptor(
2130
createFunctionToLoopPassAdaptor(std::move(LPM))));
2131
}
2132
}
2133
if (!ScalarOptimizerLateEPCallbacks.empty()) {
2134
FunctionPassManager FPM;
2135
invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2136
if (!FPM.isEmpty())
2137
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2138
}
2139
2140
invokeOptimizerEarlyEPCallbacks(MPM, Level);
2141
2142
if (!VectorizerStartEPCallbacks.empty()) {
2143
FunctionPassManager FPM;
2144
invokeVectorizerStartEPCallbacks(FPM, Level);
2145
if (!FPM.isEmpty())
2146
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2147
}
2148
2149
ModulePassManager CoroPM;
2150
CoroPM.addPass(CoroEarlyPass());
2151
CGSCCPassManager CGPM;
2152
CGPM.addPass(CoroSplitPass());
2153
CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2154
CoroPM.addPass(CoroCleanupPass());
2155
CoroPM.addPass(GlobalDCEPass());
2156
MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2157
2158
invokeOptimizerLastEPCallbacks(MPM, Level);
2159
2160
if (LTOPreLink)
2161
addRequiredLTOPreLinkPasses(MPM);
2162
2163
MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
2164
2165
return MPM;
2166
}
2167
2168
AAManager PassBuilder::buildDefaultAAPipeline() {
2169
AAManager AA;
2170
2171
// The order in which these are registered determines their priority when
2172
// being queried.
2173
2174
// First we register the basic alias analysis that provides the majority of
2175
// per-function local AA logic. This is a stateless, on-demand local set of
2176
// AA techniques.
2177
AA.registerFunctionAnalysis<BasicAA>();
2178
2179
// Next we query fast, specialized alias analyses that wrap IR-embedded
2180
// information about aliasing.
2181
AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2182
AA.registerFunctionAnalysis<TypeBasedAA>();
2183
2184
// Add support for querying global aliasing information when available.
2185
// Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2186
// analysis, all that the `AAManager` can do is query for any *cached*
2187
// results from `GlobalsAA` through a readonly proxy.
2188
if (EnableGlobalAnalyses)
2189
AA.registerModuleAnalysis<GlobalsAA>();
2190
2191
// Add target-specific alias analyses.
2192
if (TM)
2193
TM->registerDefaultAliasAnalyses(AA);
2194
2195
return AA;
2196
}
2197
2198