Path: blob/main/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
35262 views
//===- Construction of pass pipelines -------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7/// \file8///9/// This file provides the implementation of the PassBuilder based on our10/// static pass registry as well as related functionality. It also provides11/// helpers to aid in analyzing, debugging, and testing passes and pass12/// pipelines.13///14//===----------------------------------------------------------------------===//1516#include "llvm/ADT/Statistic.h"17#include "llvm/Analysis/AliasAnalysis.h"18#include "llvm/Analysis/BasicAliasAnalysis.h"19#include "llvm/Analysis/CGSCCPassManager.h"20#include "llvm/Analysis/GlobalsModRef.h"21#include "llvm/Analysis/InlineAdvisor.h"22#include "llvm/Analysis/ProfileSummaryInfo.h"23#include "llvm/Analysis/ScopedNoAliasAA.h"24#include "llvm/Analysis/TypeBasedAliasAnalysis.h"25#include "llvm/IR/PassManager.h"26#include "llvm/Passes/OptimizationLevel.h"27#include "llvm/Passes/PassBuilder.h"28#include "llvm/Support/CommandLine.h"29#include "llvm/Support/ErrorHandling.h"30#include "llvm/Support/PGOOptions.h"31#include "llvm/Support/VirtualFileSystem.h"32#include "llvm/Target/TargetMachine.h"33#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"34#include "llvm/Transforms/Coroutines/CoroCleanup.h"35#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"36#include "llvm/Transforms/Coroutines/CoroEarly.h"37#include "llvm/Transforms/Coroutines/CoroElide.h"38#include "llvm/Transforms/Coroutines/CoroSplit.h"39#include "llvm/Transforms/HipStdPar/HipStdPar.h"40#include "llvm/Transforms/IPO/AlwaysInliner.h"41#include "llvm/Transforms/IPO/Annotation2Metadata.h"42#include "llvm/Transforms/IPO/ArgumentPromotion.h"43#include "llvm/Transforms/IPO/Attributor.h"44#include "llvm/Transforms/IPO/CalledValuePropagation.h"45#include "llvm/Transforms/IPO/ConstantMerge.h"46#include "llvm/Transforms/IPO/CrossDSOCFI.h"47#include "llvm/Transforms/IPO/DeadArgumentElimination.h"48#include "llvm/Transforms/IPO/ElimAvailExtern.h"49#include "llvm/Transforms/IPO/EmbedBitcodePass.h"50#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"51#include "llvm/Transforms/IPO/FunctionAttrs.h"52#include "llvm/Transforms/IPO/GlobalDCE.h"53#include "llvm/Transforms/IPO/GlobalOpt.h"54#include "llvm/Transforms/IPO/GlobalSplit.h"55#include "llvm/Transforms/IPO/HotColdSplitting.h"56#include "llvm/Transforms/IPO/IROutliner.h"57#include "llvm/Transforms/IPO/InferFunctionAttrs.h"58#include "llvm/Transforms/IPO/Inliner.h"59#include "llvm/Transforms/IPO/LowerTypeTests.h"60#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"61#include "llvm/Transforms/IPO/MergeFunctions.h"62#include "llvm/Transforms/IPO/ModuleInliner.h"63#include "llvm/Transforms/IPO/OpenMPOpt.h"64#include "llvm/Transforms/IPO/PartialInlining.h"65#include "llvm/Transforms/IPO/SCCP.h"66#include "llvm/Transforms/IPO/SampleProfile.h"67#include "llvm/Transforms/IPO/SampleProfileProbe.h"68#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"69#include "llvm/Transforms/IPO/WholeProgramDevirt.h"70#include "llvm/Transforms/InstCombine/InstCombine.h"71#include "llvm/Transforms/Instrumentation/CGProfile.h"72#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"73#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"74#include "llvm/Transforms/Instrumentation/InstrProfiling.h"75#include "llvm/Transforms/Instrumentation/MemProfiler.h"76#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"77#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"78#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"79#include "llvm/Transforms/Scalar/ADCE.h"80#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"81#include "llvm/Transforms/Scalar/AnnotationRemarks.h"82#include "llvm/Transforms/Scalar/BDCE.h"83#include "llvm/Transforms/Scalar/CallSiteSplitting.h"84#include "llvm/Transforms/Scalar/ConstraintElimination.h"85#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"86#include "llvm/Transforms/Scalar/DFAJumpThreading.h"87#include "llvm/Transforms/Scalar/DeadStoreElimination.h"88#include "llvm/Transforms/Scalar/DivRemPairs.h"89#include "llvm/Transforms/Scalar/EarlyCSE.h"90#include "llvm/Transforms/Scalar/Float2Int.h"91#include "llvm/Transforms/Scalar/GVN.h"92#include "llvm/Transforms/Scalar/IndVarSimplify.h"93#include "llvm/Transforms/Scalar/InferAlignment.h"94#include "llvm/Transforms/Scalar/InstSimplifyPass.h"95#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"96#include "llvm/Transforms/Scalar/JumpThreading.h"97#include "llvm/Transforms/Scalar/LICM.h"98#include "llvm/Transforms/Scalar/LoopDeletion.h"99#include "llvm/Transforms/Scalar/LoopDistribute.h"100#include "llvm/Transforms/Scalar/LoopFlatten.h"101#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"102#include "llvm/Transforms/Scalar/LoopInstSimplify.h"103#include "llvm/Transforms/Scalar/LoopInterchange.h"104#include "llvm/Transforms/Scalar/LoopLoadElimination.h"105#include "llvm/Transforms/Scalar/LoopPassManager.h"106#include "llvm/Transforms/Scalar/LoopRotation.h"107#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"108#include "llvm/Transforms/Scalar/LoopSink.h"109#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"110#include "llvm/Transforms/Scalar/LoopUnrollPass.h"111#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"112#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"113#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"114#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"115#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"116#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"117#include "llvm/Transforms/Scalar/NewGVN.h"118#include "llvm/Transforms/Scalar/Reassociate.h"119#include "llvm/Transforms/Scalar/SCCP.h"120#include "llvm/Transforms/Scalar/SROA.h"121#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"122#include "llvm/Transforms/Scalar/SimplifyCFG.h"123#include "llvm/Transforms/Scalar/SpeculativeExecution.h"124#include "llvm/Transforms/Scalar/TailRecursionElimination.h"125#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"126#include "llvm/Transforms/Utils/AddDiscriminators.h"127#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"128#include "llvm/Transforms/Utils/CanonicalizeAliases.h"129#include "llvm/Transforms/Utils/CountVisits.h"130#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"131#include "llvm/Transforms/Utils/InjectTLIMappings.h"132#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"133#include "llvm/Transforms/Utils/Mem2Reg.h"134#include "llvm/Transforms/Utils/MoveAutoInit.h"135#include "llvm/Transforms/Utils/NameAnonGlobals.h"136#include "llvm/Transforms/Utils/RelLookupTableConverter.h"137#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"138#include "llvm/Transforms/Vectorize/LoopVectorize.h"139#include "llvm/Transforms/Vectorize/SLPVectorizer.h"140#include "llvm/Transforms/Vectorize/VectorCombine.h"141142using namespace llvm;143144static cl::opt<InliningAdvisorMode> UseInlineAdvisor(145"enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,146cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),147cl::values(clEnumValN(InliningAdvisorMode::Default, "default",148"Heuristics-based inliner version"),149clEnumValN(InliningAdvisorMode::Development, "development",150"Use development mode (runtime-loadable model)"),151clEnumValN(InliningAdvisorMode::Release, "release",152"Use release mode (AOT-compiled model)")));153154static cl::opt<bool> EnableSyntheticCounts(155"enable-npm-synthetic-counts", cl::Hidden,156cl::desc("Run synthetic function entry count generation "157"pass"));158159/// Flag to enable inline deferral during PGO.160static cl::opt<bool>161EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),162cl::Hidden,163cl::desc("Enable inline deferral during PGO"));164165static cl::opt<bool> EnableModuleInliner("enable-module-inliner",166cl::init(false), cl::Hidden,167cl::desc("Enable module inliner"));168169static cl::opt<bool> PerformMandatoryInliningsFirst(170"mandatory-inlining-first", cl::init(false), cl::Hidden,171cl::desc("Perform mandatory inlinings module-wide, before performing "172"inlining"));173174static cl::opt<bool> EnableEagerlyInvalidateAnalyses(175"eagerly-invalidate-analyses", cl::init(true), cl::Hidden,176cl::desc("Eagerly invalidate more analyses in default pipelines"));177178static cl::opt<bool> EnableMergeFunctions(179"enable-merge-functions", cl::init(false), cl::Hidden,180cl::desc("Enable function merging as part of the optimization pipeline"));181182static cl::opt<bool> EnablePostPGOLoopRotation(183"enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,184cl::desc("Run the loop rotation transformation after PGO instrumentation"));185186static cl::opt<bool> EnableGlobalAnalyses(187"enable-global-analyses", cl::init(true), cl::Hidden,188cl::desc("Enable inter-procedural analyses"));189190static cl::opt<bool>191RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,192cl::desc("Run Partial inlinining pass"));193194static cl::opt<bool> ExtraVectorizerPasses(195"extra-vectorizer-passes", cl::init(false), cl::Hidden,196cl::desc("Run cleanup optimization passes after vectorization"));197198static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,199cl::desc("Run the NewGVN pass"));200201static cl::opt<bool> EnableLoopInterchange(202"enable-loopinterchange", cl::init(false), cl::Hidden,203cl::desc("Enable the experimental LoopInterchange Pass"));204205static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",206cl::init(false), cl::Hidden,207cl::desc("Enable Unroll And Jam Pass"));208209static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),210cl::Hidden,211cl::desc("Enable the LoopFlatten Pass"));212213// Experimentally allow loop header duplication. This should allow for better214// optimization at Oz, since loop-idiom recognition can then recognize things215// like memcpy. If this ends up being useful for many targets, we should drop216// this flag and make a code generation option that can be controlled217// independent of the opt level and exposed through the frontend.218static cl::opt<bool> EnableLoopHeaderDuplication(219"enable-loop-header-duplication", cl::init(false), cl::Hidden,220cl::desc("Enable loop header duplication at any optimization level"));221222static cl::opt<bool>223EnableDFAJumpThreading("enable-dfa-jump-thread",224cl::desc("Enable DFA jump threading"),225cl::init(false), cl::Hidden);226227// TODO: turn on and remove flag228static cl::opt<bool> EnablePGOForceFunctionAttrs(229"enable-pgo-force-function-attrs",230cl::desc("Enable pass to set function attributes based on PGO profiles"),231cl::init(false));232233static cl::opt<bool>234EnableHotColdSplit("hot-cold-split",235cl::desc("Enable hot-cold splitting pass"));236237static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),238cl::Hidden,239cl::desc("Enable ir outliner pass"));240241static cl::opt<bool>242DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,243cl::desc("Disable pre-instrumentation inliner"));244245static cl::opt<int> PreInlineThreshold(246"preinline-threshold", cl::Hidden, cl::init(75),247cl::desc("Control the amount of inlining in pre-instrumentation inliner "248"(default = 75)"));249250static cl::opt<bool>251EnableGVNHoist("enable-gvn-hoist",252cl::desc("Enable the GVN hoisting pass (default = off)"));253254static cl::opt<bool>255EnableGVNSink("enable-gvn-sink",256cl::desc("Enable the GVN sinking pass (default = off)"));257258static cl::opt<bool> EnableJumpTableToSwitch(259"enable-jump-table-to-switch",260cl::desc("Enable JumpTableToSwitch pass (default = off)"));261262// This option is used in simplifying testing SampleFDO optimizations for263// profile loading.264static cl::opt<bool>265EnableCHR("enable-chr", cl::init(true), cl::Hidden,266cl::desc("Enable control height reduction optimization (CHR)"));267268static cl::opt<bool> FlattenedProfileUsed(269"flattened-profile-used", cl::init(false), cl::Hidden,270cl::desc("Indicate the sample profile being used is flattened, i.e., "271"no inline hierachy exists in the profile"));272273static cl::opt<bool> EnableOrderFileInstrumentation(274"enable-order-file-instrumentation", cl::init(false), cl::Hidden,275cl::desc("Enable order file instrumentation (default = off)"));276277static cl::opt<bool>278EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,279cl::desc("Enable lowering of the matrix intrinsics"));280281static cl::opt<bool> EnableConstraintElimination(282"enable-constraint-elimination", cl::init(true), cl::Hidden,283cl::desc(284"Enable pass to eliminate conditions based on linear constraints"));285286static cl::opt<AttributorRunOption> AttributorRun(287"attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),288cl::desc("Enable the attributor inter-procedural deduction pass"),289cl::values(clEnumValN(AttributorRunOption::ALL, "all",290"enable all attributor runs"),291clEnumValN(AttributorRunOption::MODULE, "module",292"enable module-wide attributor runs"),293clEnumValN(AttributorRunOption::CGSCC, "cgscc",294"enable call graph SCC attributor runs"),295clEnumValN(AttributorRunOption::NONE, "none",296"disable attributor runs")));297298static cl::opt<bool> EnableSampledInstr(299"enable-sampled-instrumentation", cl::init(false), cl::Hidden,300cl::desc("Enable profile instrumentation sampling (default = off)"));301static cl::opt<bool> UseLoopVersioningLICM(302"enable-loop-versioning-licm", cl::init(false), cl::Hidden,303cl::desc("Enable the experimental Loop Versioning LICM pass"));304305namespace llvm {306extern cl::opt<bool> EnableMemProfContextDisambiguation;307308extern cl::opt<bool> EnableInferAlignmentPass;309} // namespace llvm310311PipelineTuningOptions::PipelineTuningOptions() {312LoopInterleaving = true;313LoopVectorization = true;314SLPVectorization = false;315LoopUnrolling = true;316ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;317LicmMssaOptCap = SetLicmMssaOptCap;318LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;319CallGraphProfile = true;320UnifiedLTO = false;321MergeFunctions = EnableMergeFunctions;322InlinerThreshold = -1;323EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;324}325326namespace llvm {327extern cl::opt<unsigned> MaxDevirtIterations;328} // namespace llvm329330void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,331OptimizationLevel Level) {332for (auto &C : PeepholeEPCallbacks)333C(FPM, Level);334}335void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(336LoopPassManager &LPM, OptimizationLevel Level) {337for (auto &C : LateLoopOptimizationsEPCallbacks)338C(LPM, Level);339}340void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,341OptimizationLevel Level) {342for (auto &C : LoopOptimizerEndEPCallbacks)343C(LPM, Level);344}345void PassBuilder::invokeScalarOptimizerLateEPCallbacks(346FunctionPassManager &FPM, OptimizationLevel Level) {347for (auto &C : ScalarOptimizerLateEPCallbacks)348C(FPM, Level);349}350void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,351OptimizationLevel Level) {352for (auto &C : CGSCCOptimizerLateEPCallbacks)353C(CGPM, Level);354}355void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,356OptimizationLevel Level) {357for (auto &C : VectorizerStartEPCallbacks)358C(FPM, Level);359}360void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,361OptimizationLevel Level) {362for (auto &C : OptimizerEarlyEPCallbacks)363C(MPM, Level);364}365void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,366OptimizationLevel Level) {367for (auto &C : OptimizerLastEPCallbacks)368C(MPM, Level);369}370void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(371ModulePassManager &MPM, OptimizationLevel Level) {372for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)373C(MPM, Level);374}375void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(376ModulePassManager &MPM, OptimizationLevel Level) {377for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)378C(MPM, Level);379}380void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,381OptimizationLevel Level) {382for (auto &C : PipelineStartEPCallbacks)383C(MPM, Level);384}385void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(386ModulePassManager &MPM, OptimizationLevel Level) {387for (auto &C : PipelineEarlySimplificationEPCallbacks)388C(MPM, Level);389}390391// Helper to add AnnotationRemarksPass.392static void addAnnotationRemarksPass(ModulePassManager &MPM) {393MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));394}395396// Helper to check if the current compilation phase is preparing for LTO397static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {398return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||399Phase == ThinOrFullLTOPhase::FullLTOPreLink;400}401402// TODO: Investigate the cost/benefit of tail call elimination on debugging.403FunctionPassManager404PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,405ThinOrFullLTOPhase Phase) {406407FunctionPassManager FPM;408409if (AreStatisticsEnabled())410FPM.addPass(CountVisitsPass());411412// Form SSA out of local memory accesses after breaking apart aggregates into413// scalars.414FPM.addPass(SROAPass(SROAOptions::ModifyCFG));415416// Catch trivial redundancies417FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));418419// Hoisting of scalars and load expressions.420FPM.addPass(421SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));422FPM.addPass(InstCombinePass());423424FPM.addPass(LibCallsShrinkWrapPass());425426invokePeepholeEPCallbacks(FPM, Level);427428FPM.addPass(429SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));430431// Form canonically associated expression trees, and simplify the trees using432// basic mathematical properties. For example, this will form (nearly)433// minimal multiplication trees.434FPM.addPass(ReassociatePass());435436// Add the primary loop simplification pipeline.437// FIXME: Currently this is split into two loop pass pipelines because we run438// some function passes in between them. These can and should be removed439// and/or replaced by scheduling the loop pass equivalents in the correct440// positions. But those equivalent passes aren't powerful enough yet.441// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still442// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to443// fully replace `SimplifyCFGPass`, and the closest to the other we have is444// `LoopInstSimplify`.445LoopPassManager LPM1, LPM2;446447// Simplify the loop body. We do this initially to clean up after other loop448// passes run, either when iterating on a loop or on inner loops with449// implications on the outer loop.450LPM1.addPass(LoopInstSimplifyPass());451LPM1.addPass(LoopSimplifyCFGPass());452453// Try to remove as much code from the loop header as possible,454// to reduce amount of IR that will have to be duplicated. However,455// do not perform speculative hoisting the first time as LICM456// will destroy metadata that may not need to be destroyed if run457// after loop rotation.458// TODO: Investigate promotion cap for O1.459LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,460/*AllowSpeculation=*/false));461462LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,463isLTOPreLink(Phase)));464// TODO: Investigate promotion cap for O1.465LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,466/*AllowSpeculation=*/true));467LPM1.addPass(SimpleLoopUnswitchPass());468if (EnableLoopFlatten)469LPM1.addPass(LoopFlattenPass());470471LPM2.addPass(LoopIdiomRecognizePass());472LPM2.addPass(IndVarSimplifyPass());473474invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);475476LPM2.addPass(LoopDeletionPass());477478if (EnableLoopInterchange)479LPM2.addPass(LoopInterchangePass());480481// Do not enable unrolling in PreLinkThinLTO phase during sample PGO482// because it changes IR to makes profile annotation in back compile483// inaccurate. The normal unroller doesn't pay attention to forced full unroll484// attributes so we need to make sure and allow the full unroll pass to pay485// attention to it.486if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||487PGOOpt->Action != PGOOptions::SampleUse)488LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),489/* OnlyWhenForced= */ !PTO.LoopUnrolling,490PTO.ForgetAllSCEVInLoopUnroll));491492invokeLoopOptimizerEndEPCallbacks(LPM2, Level);493494FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),495/*UseMemorySSA=*/true,496/*UseBlockFrequencyInfo=*/true));497FPM.addPass(498SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));499FPM.addPass(InstCombinePass());500// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.501// *All* loop passes must preserve it, in order to be able to use it.502FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),503/*UseMemorySSA=*/false,504/*UseBlockFrequencyInfo=*/false));505506// Delete small array after loop unroll.507FPM.addPass(SROAPass(SROAOptions::ModifyCFG));508509// Specially optimize memory movement as it doesn't look like dataflow in SSA.510FPM.addPass(MemCpyOptPass());511512// Sparse conditional constant propagation.513// FIXME: It isn't clear why we do this *after* loop passes rather than514// before...515FPM.addPass(SCCPPass());516517// Delete dead bit computations (instcombine runs after to fold away the dead518// computations, and then ADCE will run later to exploit any new DCE519// opportunities that creates).520FPM.addPass(BDCEPass());521522// Run instcombine after redundancy and dead bit elimination to exploit523// opportunities opened up by them.524FPM.addPass(InstCombinePass());525invokePeepholeEPCallbacks(FPM, Level);526527FPM.addPass(CoroElidePass());528529invokeScalarOptimizerLateEPCallbacks(FPM, Level);530531// Finally, do an expensive DCE pass to catch all the dead code exposed by532// the simplifications and basic cleanup after all the simplifications.533// TODO: Investigate if this is too expensive.534FPM.addPass(ADCEPass());535FPM.addPass(536SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));537FPM.addPass(InstCombinePass());538invokePeepholeEPCallbacks(FPM, Level);539540return FPM;541}542543FunctionPassManager544PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,545ThinOrFullLTOPhase Phase) {546assert(Level != OptimizationLevel::O0 && "Must request optimizations!");547548// The O1 pipeline has a separate pipeline creation function to simplify549// construction readability.550if (Level.getSpeedupLevel() == 1)551return buildO1FunctionSimplificationPipeline(Level, Phase);552553FunctionPassManager FPM;554555if (AreStatisticsEnabled())556FPM.addPass(CountVisitsPass());557558// Form SSA out of local memory accesses after breaking apart aggregates into559// scalars.560FPM.addPass(SROAPass(SROAOptions::ModifyCFG));561562// Catch trivial redundancies563FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));564if (EnableKnowledgeRetention)565FPM.addPass(AssumeSimplifyPass());566567// Hoisting of scalars and load expressions.568if (EnableGVNHoist)569FPM.addPass(GVNHoistPass());570571// Global value numbering based sinking.572if (EnableGVNSink) {573FPM.addPass(GVNSinkPass());574FPM.addPass(575SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));576}577578// Speculative execution if the target has divergent branches; otherwise nop.579FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));580581// Optimize based on known information about branches, and cleanup afterward.582FPM.addPass(JumpThreadingPass());583FPM.addPass(CorrelatedValuePropagationPass());584585// Jump table to switch conversion.586if (EnableJumpTableToSwitch)587FPM.addPass(JumpTableToSwitchPass());588589FPM.addPass(590SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));591FPM.addPass(InstCombinePass());592FPM.addPass(AggressiveInstCombinePass());593594if (!Level.isOptimizingForSize())595FPM.addPass(LibCallsShrinkWrapPass());596597invokePeepholeEPCallbacks(FPM, Level);598599// For PGO use pipeline, try to optimize memory intrinsics such as memcpy600// using the size value profile. Don't perform this when optimizing for size.601if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&602!Level.isOptimizingForSize())603FPM.addPass(PGOMemOPSizeOpt());604605FPM.addPass(TailCallElimPass());606FPM.addPass(607SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));608609// Form canonically associated expression trees, and simplify the trees using610// basic mathematical properties. For example, this will form (nearly)611// minimal multiplication trees.612FPM.addPass(ReassociatePass());613614if (EnableConstraintElimination)615FPM.addPass(ConstraintEliminationPass());616617// Add the primary loop simplification pipeline.618// FIXME: Currently this is split into two loop pass pipelines because we run619// some function passes in between them. These can and should be removed620// and/or replaced by scheduling the loop pass equivalents in the correct621// positions. But those equivalent passes aren't powerful enough yet.622// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still623// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to624// fully replace `SimplifyCFGPass`, and the closest to the other we have is625// `LoopInstSimplify`.626LoopPassManager LPM1, LPM2;627628// Simplify the loop body. We do this initially to clean up after other loop629// passes run, either when iterating on a loop or on inner loops with630// implications on the outer loop.631LPM1.addPass(LoopInstSimplifyPass());632LPM1.addPass(LoopSimplifyCFGPass());633634// Try to remove as much code from the loop header as possible,635// to reduce amount of IR that will have to be duplicated. However,636// do not perform speculative hoisting the first time as LICM637// will destroy metadata that may not need to be destroyed if run638// after loop rotation.639// TODO: Investigate promotion cap for O1.640LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,641/*AllowSpeculation=*/false));642643// Disable header duplication in loop rotation at -Oz.644LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||645Level != OptimizationLevel::Oz,646isLTOPreLink(Phase)));647// TODO: Investigate promotion cap for O1.648LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,649/*AllowSpeculation=*/true));650LPM1.addPass(651SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));652if (EnableLoopFlatten)653LPM1.addPass(LoopFlattenPass());654655LPM2.addPass(LoopIdiomRecognizePass());656LPM2.addPass(IndVarSimplifyPass());657658{659ExtraSimpleLoopUnswitchPassManager ExtraPasses;660ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==661OptimizationLevel::O3));662LPM2.addPass(std::move(ExtraPasses));663}664665invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);666667LPM2.addPass(LoopDeletionPass());668669if (EnableLoopInterchange)670LPM2.addPass(LoopInterchangePass());671672// Do not enable unrolling in PreLinkThinLTO phase during sample PGO673// because it changes IR to makes profile annotation in back compile674// inaccurate. The normal unroller doesn't pay attention to forced full unroll675// attributes so we need to make sure and allow the full unroll pass to pay676// attention to it.677if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||678PGOOpt->Action != PGOOptions::SampleUse)679LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),680/* OnlyWhenForced= */ !PTO.LoopUnrolling,681PTO.ForgetAllSCEVInLoopUnroll));682683invokeLoopOptimizerEndEPCallbacks(LPM2, Level);684685FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),686/*UseMemorySSA=*/true,687/*UseBlockFrequencyInfo=*/true));688FPM.addPass(689SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));690FPM.addPass(InstCombinePass());691// The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,692// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.693// *All* loop passes must preserve it, in order to be able to use it.694FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),695/*UseMemorySSA=*/false,696/*UseBlockFrequencyInfo=*/false));697698// Delete small array after loop unroll.699FPM.addPass(SROAPass(SROAOptions::ModifyCFG));700701// Try vectorization/scalarization transforms that are both improvements702// themselves and can allow further folds with GVN and InstCombine.703FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));704705// Eliminate redundancies.706FPM.addPass(MergedLoadStoreMotionPass());707if (RunNewGVN)708FPM.addPass(NewGVNPass());709else710FPM.addPass(GVNPass());711712// Sparse conditional constant propagation.713// FIXME: It isn't clear why we do this *after* loop passes rather than714// before...715FPM.addPass(SCCPPass());716717// Delete dead bit computations (instcombine runs after to fold away the dead718// computations, and then ADCE will run later to exploit any new DCE719// opportunities that creates).720FPM.addPass(BDCEPass());721722// Run instcombine after redundancy and dead bit elimination to exploit723// opportunities opened up by them.724FPM.addPass(InstCombinePass());725invokePeepholeEPCallbacks(FPM, Level);726727// Re-consider control flow based optimizations after redundancy elimination,728// redo DCE, etc.729if (EnableDFAJumpThreading)730FPM.addPass(DFAJumpThreadingPass());731732FPM.addPass(JumpThreadingPass());733FPM.addPass(CorrelatedValuePropagationPass());734735// Finally, do an expensive DCE pass to catch all the dead code exposed by736// the simplifications and basic cleanup after all the simplifications.737// TODO: Investigate if this is too expensive.738FPM.addPass(ADCEPass());739740// Specially optimize memory movement as it doesn't look like dataflow in SSA.741FPM.addPass(MemCpyOptPass());742743FPM.addPass(DSEPass());744FPM.addPass(MoveAutoInitPass());745746FPM.addPass(createFunctionToLoopPassAdaptor(747LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,748/*AllowSpeculation=*/true),749/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));750751FPM.addPass(CoroElidePass());752753invokeScalarOptimizerLateEPCallbacks(FPM, Level);754755FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()756.convertSwitchRangeToICmp(true)757.hoistCommonInsts(true)758.sinkCommonInsts(true)));759FPM.addPass(InstCombinePass());760invokePeepholeEPCallbacks(FPM, Level);761762return FPM;763}764765void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {766MPM.addPass(CanonicalizeAliasesPass());767MPM.addPass(NameAnonGlobalPass());768}769770void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,771OptimizationLevel Level,772ThinOrFullLTOPhase LTOPhase) {773assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");774if (DisablePreInliner)775return;776InlineParams IP;777778IP.DefaultThreshold = PreInlineThreshold;779780// FIXME: The hint threshold has the same value used by the regular inliner781// when not optimzing for size. This should probably be lowered after782// performance testing.783// FIXME: this comment is cargo culted from the old pass manager, revisit).784IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;785ModuleInlinerWrapperPass MIWP(786IP, /* MandatoryFirst */ true,787InlineContext{LTOPhase, InlinePass::EarlyInliner});788CGSCCPassManager &CGPipeline = MIWP.getPM();789790FunctionPassManager FPM;791FPM.addPass(SROAPass(SROAOptions::ModifyCFG));792FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.793FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(794true))); // Merge & remove basic blocks.795FPM.addPass(InstCombinePass()); // Combine silly sequences.796invokePeepholeEPCallbacks(FPM, Level);797798CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(799std::move(FPM), PTO.EagerlyInvalidateAnalyses));800801MPM.addPass(std::move(MIWP));802803// Delete anything that is now dead to make sure that we don't instrument804// dead code. Instrumentation can end up keeping dead code around and805// dramatically increase code size.806MPM.addPass(GlobalDCEPass());807}808809void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,810OptimizationLevel Level) {811if (EnablePostPGOLoopRotation) {812// Disable header duplication in loop rotation at -Oz.813MPM.addPass(createModuleToFunctionPassAdaptor(814createFunctionToLoopPassAdaptor(815LoopRotatePass(EnableLoopHeaderDuplication ||816Level != OptimizationLevel::Oz),817/*UseMemorySSA=*/false,818/*UseBlockFrequencyInfo=*/false),819PTO.EagerlyInvalidateAnalyses));820}821}822823void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,824OptimizationLevel Level, bool RunProfileGen,825bool IsCS, bool AtomicCounterUpdate,826std::string ProfileFile,827std::string ProfileRemappingFile,828IntrusiveRefCntPtr<vfs::FileSystem> FS) {829assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");830831if (!RunProfileGen) {832assert(!ProfileFile.empty() && "Profile use expecting a profile file!");833MPM.addPass(834PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));835// Cache ProfileSummaryAnalysis once to avoid the potential need to insert836// RequireAnalysisPass for PSI before subsequent non-module passes.837MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());838return;839}840841// Perform PGO instrumentation.842MPM.addPass(PGOInstrumentationGen(IsCS));843844addPostPGOLoopRotation(MPM, Level);845// Add the profile lowering pass.846InstrProfOptions Options;847if (!ProfileFile.empty())848Options.InstrProfileOutput = ProfileFile;849// Do counter promotion at Level greater than O0.850Options.DoCounterPromotion = true;851Options.UseBFIInPromotion = IsCS;852if (EnableSampledInstr) {853Options.Sampling = true;854// With sampling, there is little beneifit to enable counter promotion.855// But note that sampling does work with counter promotion.856Options.DoCounterPromotion = false;857}858Options.Atomic = AtomicCounterUpdate;859MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));860}861862void PassBuilder::addPGOInstrPassesForO0(863ModulePassManager &MPM, bool RunProfileGen, bool IsCS,864bool AtomicCounterUpdate, std::string ProfileFile,865std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {866if (!RunProfileGen) {867assert(!ProfileFile.empty() && "Profile use expecting a profile file!");868MPM.addPass(869PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));870// Cache ProfileSummaryAnalysis once to avoid the potential need to insert871// RequireAnalysisPass for PSI before subsequent non-module passes.872MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());873return;874}875876// Perform PGO instrumentation.877MPM.addPass(PGOInstrumentationGen(IsCS));878// Add the profile lowering pass.879InstrProfOptions Options;880if (!ProfileFile.empty())881Options.InstrProfileOutput = ProfileFile;882// Do not do counter promotion at O0.883Options.DoCounterPromotion = false;884Options.UseBFIInPromotion = IsCS;885Options.Atomic = AtomicCounterUpdate;886MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));887}888889static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {890return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());891}892893ModuleInlinerWrapperPass894PassBuilder::buildInlinerPipeline(OptimizationLevel Level,895ThinOrFullLTOPhase Phase) {896InlineParams IP;897if (PTO.InlinerThreshold == -1)898IP = getInlineParamsFromOptLevel(Level);899else900IP = getInlineParams(PTO.InlinerThreshold);901// For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to902// disable hot callsite inline (as much as possible [1]) because it makes903// profile annotation in the backend inaccurate.904//905// [1] Note the cost of a function could be below zero due to erased906// prologue / epilogue.907if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&908PGOOpt->Action == PGOOptions::SampleUse)909IP.HotCallSiteThreshold = 0;910911if (PGOOpt)912IP.EnableDeferral = EnablePGOInlineDeferral;913914ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,915InlineContext{Phase, InlinePass::CGSCCInliner},916UseInlineAdvisor, MaxDevirtIterations);917918// Require the GlobalsAA analysis for the module so we can query it within919// the CGSCC pipeline.920if (EnableGlobalAnalyses) {921MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());922// Invalidate AAManager so it can be recreated and pick up the newly923// available GlobalsAA.924MIWP.addModulePass(925createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));926}927928// Require the ProfileSummaryAnalysis for the module so we can query it within929// the inliner pass.930MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());931932// Now begin the main postorder CGSCC pipeline.933// FIXME: The current CGSCC pipeline has its origins in the legacy pass934// manager and trying to emulate its precise behavior. Much of this doesn't935// make a lot of sense and we should revisit the core CGSCC structure.936CGSCCPassManager &MainCGPipeline = MIWP.getPM();937938// Note: historically, the PruneEH pass was run first to deduce nounwind and939// generally clean up exception handling overhead. It isn't clear this is940// valuable as the inliner doesn't currently care whether it is inlining an941// invoke or a call.942943if (AttributorRun & AttributorRunOption::CGSCC)944MainCGPipeline.addPass(AttributorCGSCCPass());945946// Deduce function attributes. We do another run of this after the function947// simplification pipeline, so this only needs to run when it could affect the948// function simplification pipeline, which is only the case with recursive949// functions.950MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));951952// When at O3 add argument promotion to the pass pipeline.953// FIXME: It isn't at all clear why this should be limited to O3.954if (Level == OptimizationLevel::O3)955MainCGPipeline.addPass(ArgumentPromotionPass());956957// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if958// there are no OpenMP runtime calls present in the module.959if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)960MainCGPipeline.addPass(OpenMPOptCGSCCPass());961962invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);963964// Add the core function simplification pipeline nested inside the965// CGSCC walk.966MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(967buildFunctionSimplificationPipeline(Level, Phase),968PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));969970// Finally, deduce any function attributes based on the fully simplified971// function.972MainCGPipeline.addPass(PostOrderFunctionAttrsPass());973974// Mark that the function is fully simplified and that it shouldn't be975// simplified again if we somehow revisit it due to CGSCC mutations unless976// it's been modified since.977MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(978RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));979980MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));981982// Make sure we don't affect potential future NoRerun CGSCC adaptors.983MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(984InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));985986return MIWP;987}988989ModulePassManager990PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,991ThinOrFullLTOPhase Phase) {992ModulePassManager MPM;993994InlineParams IP = getInlineParamsFromOptLevel(Level);995// For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to996// disable hot callsite inline (as much as possible [1]) because it makes997// profile annotation in the backend inaccurate.998//999// [1] Note the cost of a function could be below zero due to erased1000// prologue / epilogue.1001if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&1002PGOOpt->Action == PGOOptions::SampleUse)1003IP.HotCallSiteThreshold = 0;10041005if (PGOOpt)1006IP.EnableDeferral = EnablePGOInlineDeferral;10071008// The inline deferral logic is used to avoid losing some1009// inlining chance in future. It is helpful in SCC inliner, in which1010// inlining is processed in bottom-up order.1011// While in module inliner, the inlining order is a priority-based order1012// by default. The inline deferral is unnecessary there. So we disable the1013// inline deferral logic in module inliner.1014IP.EnableDeferral = false;10151016MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));10171018MPM.addPass(createModuleToFunctionPassAdaptor(1019buildFunctionSimplificationPipeline(Level, Phase),1020PTO.EagerlyInvalidateAnalyses));10211022MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(1023CoroSplitPass(Level != OptimizationLevel::O0)));10241025return MPM;1026}10271028ModulePassManager1029PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,1030ThinOrFullLTOPhase Phase) {1031assert(Level != OptimizationLevel::O0 &&1032"Should not be used for O0 pipeline");10331034assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&1035"FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");10361037ModulePassManager MPM;10381039// Place pseudo probe instrumentation as the first pass of the pipeline to1040// minimize the impact of optimization changes.1041if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&1042Phase != ThinOrFullLTOPhase::ThinLTOPostLink)1043MPM.addPass(SampleProfileProbePass(TM));10441045bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);10461047// In ThinLTO mode, when flattened profile is used, all the available1048// profile information will be annotated in PreLink phase so there is1049// no need to load the profile again in PostLink.1050bool LoadSampleProfile =1051HasSampleProfile &&1052!(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);10531054// During the ThinLTO backend phase we perform early indirect call promotion1055// here, before globalopt. Otherwise imported available_externally functions1056// look unreferenced and are removed. If we are going to load the sample1057// profile then defer until later.1058// TODO: See if we can move later and consolidate with the location where1059// we perform ICP when we are loading a sample profile.1060// TODO: We pass HasSampleProfile (whether there was a sample profile file1061// passed to the compile) to the SamplePGO flag of ICP. This is used to1062// determine whether the new direct calls are annotated with prof metadata.1063// Ideally this should be determined from whether the IR is annotated with1064// sample profile, and not whether the a sample profile was provided on the1065// command line. E.g. for flattened profiles where we will not be reloading1066// the sample profile in the ThinLTO backend, we ideally shouldn't have to1067// provide the sample profile file.1068if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)1069MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));10701071// Create an early function pass manager to cleanup the output of the1072// frontend. Not necessary with LTO post link pipelines since the pre link1073// pipeline already cleaned up the frontend output.1074if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {1075// Do basic inference of function attributes from known properties of system1076// libraries and other oracles.1077MPM.addPass(InferFunctionAttrsPass());1078MPM.addPass(CoroEarlyPass());10791080FunctionPassManager EarlyFPM;1081EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));1082// Lower llvm.expect to metadata before attempting transforms.1083// Compare/branch metadata may alter the behavior of passes like1084// SimplifyCFG.1085EarlyFPM.addPass(LowerExpectIntrinsicPass());1086EarlyFPM.addPass(SimplifyCFGPass());1087EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));1088EarlyFPM.addPass(EarlyCSEPass());1089if (Level == OptimizationLevel::O3)1090EarlyFPM.addPass(CallSiteSplittingPass());1091MPM.addPass(createModuleToFunctionPassAdaptor(1092std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));1093}10941095if (LoadSampleProfile) {1096// Annotate sample profile right after early FPM to ensure freshness of1097// the debug info.1098MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,1099PGOOpt->ProfileRemappingFile, Phase));1100// Cache ProfileSummaryAnalysis once to avoid the potential need to insert1101// RequireAnalysisPass for PSI before subsequent non-module passes.1102MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());1103// Do not invoke ICP in the LTOPrelink phase as it makes it hard1104// for the profile annotation to be accurate in the LTO backend.1105if (!isLTOPreLink(Phase))1106// We perform early indirect call promotion here, before globalopt.1107// This is important for the ThinLTO backend phase because otherwise1108// imported available_externally functions look unreferenced and are1109// removed.1110MPM.addPass(1111PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));1112}11131114// Try to perform OpenMP specific optimizations on the module. This is a1115// (quick!) no-op if there are no OpenMP runtime calls present in the module.1116MPM.addPass(OpenMPOptPass());11171118if (AttributorRun & AttributorRunOption::MODULE)1119MPM.addPass(AttributorPass());11201121// Lower type metadata and the type.test intrinsic in the ThinLTO1122// post link pipeline after ICP. This is to enable usage of the type1123// tests in ICP sequences.1124if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)1125MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));11261127invokePipelineEarlySimplificationEPCallbacks(MPM, Level);11281129// Interprocedural constant propagation now that basic cleanup has occurred1130// and prior to optimizing globals.1131// FIXME: This position in the pipeline hasn't been carefully considered in1132// years, it should be re-analyzed.1133MPM.addPass(IPSCCPPass(1134IPSCCPOptions(/*AllowFuncSpec=*/1135Level != OptimizationLevel::Os &&1136Level != OptimizationLevel::Oz &&1137!isLTOPreLink(Phase))));11381139// Attach metadata to indirect call sites indicating the set of functions1140// they may target at run-time. This should follow IPSCCP.1141MPM.addPass(CalledValuePropagationPass());11421143// Optimize globals to try and fold them into constants.1144MPM.addPass(GlobalOptPass());11451146// Create a small function pass pipeline to cleanup after all the global1147// optimizations.1148FunctionPassManager GlobalCleanupPM;1149// FIXME: Should this instead by a run of SROA?1150GlobalCleanupPM.addPass(PromotePass());1151GlobalCleanupPM.addPass(InstCombinePass());1152invokePeepholeEPCallbacks(GlobalCleanupPM, Level);1153GlobalCleanupPM.addPass(1154SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));1155MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),1156PTO.EagerlyInvalidateAnalyses));11571158// We already asserted this happens in non-FullLTOPostLink earlier.1159const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;1160const bool IsPGOPreLink = PGOOpt && IsPreLink;1161const bool IsPGOInstrGen =1162IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;1163const bool IsPGOInstrUse =1164IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;1165const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();1166// We don't want to mix pgo ctx gen and pgo gen; we also don't currently1167// enable ctx profiling from the frontend.1168assert(1169!(IsPGOInstrGen && PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) &&1170"Enabling both instrumented FDO and contextual instrumentation is not "1171"supported.");1172// Enable contextual profiling instrumentation.1173const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&1174PGOCtxProfLoweringPass::isContextualIRPGOEnabled();11751176if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen)1177addPreInlinerPasses(MPM, Level, Phase);11781179// Add all the requested passes for instrumentation PGO, if requested.1180if (IsPGOInstrGen || IsPGOInstrUse) {1181addPGOInstrPasses(MPM, Level,1182/*RunProfileGen=*/IsPGOInstrGen,1183/*IsCS=*/false, PGOOpt->AtomicCounterUpdate,1184PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,1185PGOOpt->FS);1186} else if (IsCtxProfGen) {1187MPM.addPass(PGOInstrumentationGen(false));1188addPostPGOLoopRotation(MPM, Level);1189MPM.addPass(PGOCtxProfLoweringPass());1190}11911192if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)1193MPM.addPass(PGOIndirectCallPromotion(false, false));11941195if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)1196MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,1197EnableSampledInstr));11981199if (IsMemprofUse)1200MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));12011202// Synthesize function entry counts for non-PGO compilation.1203if (EnableSyntheticCounts && !PGOOpt)1204MPM.addPass(SyntheticCountsPropagation());12051206if (EnablePGOForceFunctionAttrs && PGOOpt)1207MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));12081209MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));12101211if (EnableModuleInliner)1212MPM.addPass(buildModuleInlinerPipeline(Level, Phase));1213else1214MPM.addPass(buildInlinerPipeline(Level, Phase));12151216// Remove any dead arguments exposed by cleanups, constant folding globals,1217// and argument promotion.1218MPM.addPass(DeadArgumentEliminationPass());12191220MPM.addPass(CoroCleanupPass());12211222// Optimize globals now that functions are fully simplified.1223MPM.addPass(GlobalOptPass());1224MPM.addPass(GlobalDCEPass());12251226return MPM;1227}12281229/// TODO: Should LTO cause any differences to this set of passes?1230void PassBuilder::addVectorPasses(OptimizationLevel Level,1231FunctionPassManager &FPM, bool IsFullLTO) {1232FPM.addPass(LoopVectorizePass(1233LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));12341235if (EnableInferAlignmentPass)1236FPM.addPass(InferAlignmentPass());1237if (IsFullLTO) {1238// The vectorizer may have significantly shortened a loop body; unroll1239// again. Unroll small loops to hide loop backedge latency and saturate any1240// parallel execution resources of an out-of-order processor. We also then1241// need to clean up redundancies and loop invariant code.1242// FIXME: It would be really good to use a loop-integrated instruction1243// combiner for cleanup here so that the unrolling and LICM can be pipelined1244// across the loop nests.1245// We do UnrollAndJam in a separate LPM to ensure it happens before unroll1246if (EnableUnrollAndJam && PTO.LoopUnrolling)1247FPM.addPass(createFunctionToLoopPassAdaptor(1248LoopUnrollAndJamPass(Level.getSpeedupLevel())));1249FPM.addPass(LoopUnrollPass(LoopUnrollOptions(1250Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,1251PTO.ForgetAllSCEVInLoopUnroll)));1252FPM.addPass(WarnMissedTransformationsPass());1253// Now that we are done with loop unrolling, be it either by LoopVectorizer,1254// or LoopUnroll passes, some variable-offset GEP's into alloca's could have1255// become constant-offset, thus enabling SROA and alloca promotion. Do so.1256// NOTE: we are very late in the pipeline, and we don't have any LICM1257// or SimplifyCFG passes scheduled after us, that would cleanup1258// the CFG mess this may created if allowed to modify CFG, so forbid that.1259FPM.addPass(SROAPass(SROAOptions::PreserveCFG));1260}12611262if (!IsFullLTO) {1263// Eliminate loads by forwarding stores from the previous iteration to loads1264// of the current iteration.1265FPM.addPass(LoopLoadEliminationPass());1266}1267// Cleanup after the loop optimization passes.1268FPM.addPass(InstCombinePass());12691270if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {1271ExtraVectorPassManager ExtraPasses;1272// At higher optimization levels, try to clean up any runtime overlap and1273// alignment checks inserted by the vectorizer. We want to track correlated1274// runtime checks for two inner loops in the same outer loop, fold any1275// common computations, hoist loop-invariant aspects out of any outer loop,1276// and unswitch the runtime checks if possible. Once hoisted, we may have1277// dead (or speculatable) control flows or more combining opportunities.1278ExtraPasses.addPass(EarlyCSEPass());1279ExtraPasses.addPass(CorrelatedValuePropagationPass());1280ExtraPasses.addPass(InstCombinePass());1281LoopPassManager LPM;1282LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,1283/*AllowSpeculation=*/true));1284LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==1285OptimizationLevel::O3));1286ExtraPasses.addPass(1287createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,1288/*UseBlockFrequencyInfo=*/true));1289ExtraPasses.addPass(1290SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));1291ExtraPasses.addPass(InstCombinePass());1292FPM.addPass(std::move(ExtraPasses));1293}12941295// Now that we've formed fast to execute loop structures, we do further1296// optimizations. These are run afterward as they might block doing complex1297// analyses and transforms such as what are needed for loop vectorization.12981299// Cleanup after loop vectorization, etc. Simplification passes like CVP and1300// GVN, loop transforms, and others have already run, so it's now better to1301// convert to more optimized IR using more aggressive simplify CFG options.1302// The extra sinking transform can create larger basic blocks, so do this1303// before SLP vectorization.1304FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()1305.forwardSwitchCondToPhi(true)1306.convertSwitchRangeToICmp(true)1307.convertSwitchToLookupTable(true)1308.needCanonicalLoops(false)1309.hoistCommonInsts(true)1310.sinkCommonInsts(true)));13111312if (IsFullLTO) {1313FPM.addPass(SCCPPass());1314FPM.addPass(InstCombinePass());1315FPM.addPass(BDCEPass());1316}13171318// Optimize parallel scalar instruction chains into SIMD instructions.1319if (PTO.SLPVectorization) {1320FPM.addPass(SLPVectorizerPass());1321if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {1322FPM.addPass(EarlyCSEPass());1323}1324}1325// Enhance/cleanup vector code.1326FPM.addPass(VectorCombinePass());13271328if (!IsFullLTO) {1329FPM.addPass(InstCombinePass());1330// Unroll small loops to hide loop backedge latency and saturate any1331// parallel execution resources of an out-of-order processor. We also then1332// need to clean up redundancies and loop invariant code.1333// FIXME: It would be really good to use a loop-integrated instruction1334// combiner for cleanup here so that the unrolling and LICM can be pipelined1335// across the loop nests.1336// We do UnrollAndJam in a separate LPM to ensure it happens before unroll1337if (EnableUnrollAndJam && PTO.LoopUnrolling) {1338FPM.addPass(createFunctionToLoopPassAdaptor(1339LoopUnrollAndJamPass(Level.getSpeedupLevel())));1340}1341FPM.addPass(LoopUnrollPass(LoopUnrollOptions(1342Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,1343PTO.ForgetAllSCEVInLoopUnroll)));1344FPM.addPass(WarnMissedTransformationsPass());1345// Now that we are done with loop unrolling, be it either by LoopVectorizer,1346// or LoopUnroll passes, some variable-offset GEP's into alloca's could have1347// become constant-offset, thus enabling SROA and alloca promotion. Do so.1348// NOTE: we are very late in the pipeline, and we don't have any LICM1349// or SimplifyCFG passes scheduled after us, that would cleanup1350// the CFG mess this may created if allowed to modify CFG, so forbid that.1351FPM.addPass(SROAPass(SROAOptions::PreserveCFG));1352}13531354if (EnableInferAlignmentPass)1355FPM.addPass(InferAlignmentPass());1356FPM.addPass(InstCombinePass());13571358// This is needed for two reasons:1359// 1. It works around problems that instcombine introduces, such as sinking1360// expensive FP divides into loops containing multiplications using the1361// divide result.1362// 2. It helps to clean up some loop-invariant code created by the loop1363// unroll pass when IsFullLTO=false.1364FPM.addPass(createFunctionToLoopPassAdaptor(1365LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,1366/*AllowSpeculation=*/true),1367/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));13681369// Now that we've vectorized and unrolled loops, we may have more refined1370// alignment information, try to re-derive it here.1371FPM.addPass(AlignmentFromAssumptionsPass());1372}13731374ModulePassManager1375PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,1376ThinOrFullLTOPhase LTOPhase) {1377const bool LTOPreLink = isLTOPreLink(LTOPhase);1378ModulePassManager MPM;13791380// Run partial inlining pass to partially inline functions that have1381// large bodies.1382if (RunPartialInlining)1383MPM.addPass(PartialInlinerPass());13841385// Remove avail extern fns and globals definitions since we aren't compiling1386// an object file for later LTO. For LTO we want to preserve these so they1387// are eligible for inlining at link-time. Note if they are unreferenced they1388// will be removed by GlobalDCE later, so this only impacts referenced1389// available externally globals. Eventually they will be suppressed during1390// codegen, but eliminating here enables more opportunity for GlobalDCE as it1391// may make globals referenced by available external functions dead and saves1392// running remaining passes on the eliminated functions. These should be1393// preserved during prelinking for link-time inlining decisions.1394if (!LTOPreLink)1395MPM.addPass(EliminateAvailableExternallyPass());13961397if (EnableOrderFileInstrumentation)1398MPM.addPass(InstrOrderFilePass());13991400// Do RPO function attribute inference across the module to forward-propagate1401// attributes where applicable.1402// FIXME: Is this really an optimization rather than a canonicalization?1403MPM.addPass(ReversePostOrderFunctionAttrsPass());14041405// Do a post inline PGO instrumentation and use pass. This is a context1406// sensitive PGO pass. We don't want to do this in LTOPreLink phrase as1407// cross-module inline has not been done yet. The context sensitive1408// instrumentation is after all the inlines are done.1409if (!LTOPreLink && PGOOpt) {1410if (PGOOpt->CSAction == PGOOptions::CSIRInstr)1411addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,1412/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,1413PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,1414PGOOpt->FS);1415else if (PGOOpt->CSAction == PGOOptions::CSIRUse)1416addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,1417/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,1418PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,1419PGOOpt->FS);1420}14211422// Re-compute GlobalsAA here prior to function passes. This is particularly1423// useful as the above will have inlined, DCE'ed, and function-attr1424// propagated everything. We should at this point have a reasonably minimal1425// and richly annotated call graph. By computing aliasing and mod/ref1426// information for all local globals here, the late loop passes and notably1427// the vectorizer will be able to use them to help recognize vectorizable1428// memory operations.1429if (EnableGlobalAnalyses)1430MPM.addPass(RecomputeGlobalsAAPass());14311432invokeOptimizerEarlyEPCallbacks(MPM, Level);14331434FunctionPassManager OptimizePM;1435// Scheduling LoopVersioningLICM when inlining is over, because after that1436// we may see more accurate aliasing. Reason to run this late is that too1437// early versioning may prevent further inlining due to increase of code1438// size. Other optimizations which runs later might get benefit of no-alias1439// assumption in clone loop.1440if (UseLoopVersioningLICM) {1441OptimizePM.addPass(1442createFunctionToLoopPassAdaptor(LoopVersioningLICMPass()));1443// LoopVersioningLICM pass might increase new LICM opportunities.1444OptimizePM.addPass(createFunctionToLoopPassAdaptor(1445LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,1446/*AllowSpeculation=*/true),1447/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));1448}14491450OptimizePM.addPass(Float2IntPass());1451OptimizePM.addPass(LowerConstantIntrinsicsPass());14521453if (EnableMatrix) {1454OptimizePM.addPass(LowerMatrixIntrinsicsPass());1455OptimizePM.addPass(EarlyCSEPass());1456}14571458// CHR pass should only be applied with the profile information.1459// The check is to check the profile summary information in CHR.1460if (EnableCHR && Level == OptimizationLevel::O3)1461OptimizePM.addPass(ControlHeightReductionPass());14621463// FIXME: We need to run some loop optimizations to re-rotate loops after1464// simplifycfg and others undo their rotation.14651466// Optimize the loop execution. These passes operate on entire loop nests1467// rather than on each loop in an inside-out manner, and so they are actually1468// function passes.14691470invokeVectorizerStartEPCallbacks(OptimizePM, Level);14711472LoopPassManager LPM;1473// First rotate loops that may have been un-rotated by prior passes.1474// Disable header duplication at -Oz.1475LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||1476Level != OptimizationLevel::Oz,1477LTOPreLink));1478// Some loops may have become dead by now. Try to delete them.1479// FIXME: see discussion in https://reviews.llvm.org/D112851,1480// this may need to be revisited once we run GVN before loop deletion1481// in the simplification pipeline.1482LPM.addPass(LoopDeletionPass());1483OptimizePM.addPass(createFunctionToLoopPassAdaptor(1484std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));14851486// Distribute loops to allow partial vectorization. I.e. isolate dependences1487// into separate loop that would otherwise inhibit vectorization. This is1488// currently only performed for loops marked with the metadata1489// llvm.loop.distribute=true or when -enable-loop-distribute is specified.1490OptimizePM.addPass(LoopDistributePass());14911492// Populates the VFABI attribute with the scalar-to-vector mappings1493// from the TargetLibraryInfo.1494OptimizePM.addPass(InjectTLIMappings());14951496addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);14971498// LoopSink pass sinks instructions hoisted by LICM, which serves as a1499// canonicalization pass that enables other optimizations. As a result,1500// LoopSink pass needs to be a very late IR pass to avoid undoing LICM1501// result too early.1502OptimizePM.addPass(LoopSinkPass());15031504// And finally clean up LCSSA form before generating code.1505OptimizePM.addPass(InstSimplifyPass());15061507// This hoists/decomposes div/rem ops. It should run after other sink/hoist1508// passes to avoid re-sinking, but before SimplifyCFG because it can allow1509// flattening of blocks.1510OptimizePM.addPass(DivRemPairsPass());15111512// Try to annotate calls that were created during optimization.1513OptimizePM.addPass(TailCallElimPass());15141515// LoopSink (and other loop passes since the last simplifyCFG) might have1516// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.1517OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()1518.convertSwitchRangeToICmp(true)1519.speculateUnpredictables(true)));15201521// Add the core optimizing pipeline.1522MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),1523PTO.EagerlyInvalidateAnalyses));15241525invokeOptimizerLastEPCallbacks(MPM, Level);15261527// Split out cold code. Splitting is done late to avoid hiding context from1528// other optimizations and inadvertently regressing performance. The tradeoff1529// is that this has a higher code size cost than splitting early.1530if (EnableHotColdSplit && !LTOPreLink)1531MPM.addPass(HotColdSplittingPass());15321533// Search the code for similar regions of code. If enough similar regions can1534// be found where extracting the regions into their own function will decrease1535// the size of the program, we extract the regions, a deduplicate the1536// structurally similar regions.1537if (EnableIROutliner)1538MPM.addPass(IROutlinerPass());15391540// Now we need to do some global optimization transforms.1541// FIXME: It would seem like these should come first in the optimization1542// pipeline and maybe be the bottom of the canonicalization pipeline? Weird1543// ordering here.1544MPM.addPass(GlobalDCEPass());1545MPM.addPass(ConstantMergePass());15461547// Merge functions if requested. It has a better chance to merge functions1548// after ConstantMerge folded jump tables.1549if (PTO.MergeFunctions)1550MPM.addPass(MergeFunctionsPass());15511552if (PTO.CallGraphProfile && !LTOPreLink)1553MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||1554LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));15551556// TODO: Relative look table converter pass caused an issue when full lto is1557// enabled. See https://reviews.llvm.org/D94355 for more details.1558// Until the issue fixed, disable this pass during pre-linking phase.1559if (!LTOPreLink)1560MPM.addPass(RelLookupTableConverterPass());15611562return MPM;1563}15641565ModulePassManager1566PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,1567bool LTOPreLink) {1568if (Level == OptimizationLevel::O0)1569return buildO0DefaultPipeline(Level, LTOPreLink);15701571ModulePassManager MPM;15721573// Convert @llvm.global.annotations to !annotation metadata.1574MPM.addPass(Annotation2MetadataPass());15751576// Force any function attributes we want the rest of the pipeline to observe.1577MPM.addPass(ForceFunctionAttrsPass());15781579if (PGOOpt && PGOOpt->DebugInfoForProfiling)1580MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));15811582// Apply module pipeline start EP callback.1583invokePipelineStartEPCallbacks(MPM, Level);15841585const ThinOrFullLTOPhase LTOPhase = LTOPreLink1586? ThinOrFullLTOPhase::FullLTOPreLink1587: ThinOrFullLTOPhase::None;1588// Add the core simplification pipeline.1589MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase));15901591// Now add the optimization pipeline.1592MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase));15931594if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&1595PGOOpt->Action == PGOOptions::SampleUse)1596MPM.addPass(PseudoProbeUpdatePass());15971598// Emit annotation remarks.1599addAnnotationRemarksPass(MPM);16001601if (LTOPreLink)1602addRequiredLTOPreLinkPasses(MPM);1603return MPM;1604}16051606ModulePassManager1607PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,1608bool EmitSummary) {1609ModulePassManager MPM;1610if (ThinLTO)1611MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));1612else1613MPM.addPass(buildLTOPreLinkDefaultPipeline(Level));1614MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));16151616// Use the ThinLTO post-link pipeline with sample profiling1617if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)1618MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));1619else {1620// otherwise, just use module optimization1621MPM.addPass(1622buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));1623// Emit annotation remarks.1624addAnnotationRemarksPass(MPM);1625}1626return MPM;1627}16281629ModulePassManager1630PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {1631if (Level == OptimizationLevel::O0)1632return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);16331634ModulePassManager MPM;16351636// Convert @llvm.global.annotations to !annotation metadata.1637MPM.addPass(Annotation2MetadataPass());16381639// Force any function attributes we want the rest of the pipeline to observe.1640MPM.addPass(ForceFunctionAttrsPass());16411642if (PGOOpt && PGOOpt->DebugInfoForProfiling)1643MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));16441645// Apply module pipeline start EP callback.1646invokePipelineStartEPCallbacks(MPM, Level);16471648// If we are planning to perform ThinLTO later, we don't bloat the code with1649// unrolling/vectorization/... now. Just simplify the module as much as we1650// can.1651MPM.addPass(buildModuleSimplificationPipeline(1652Level, ThinOrFullLTOPhase::ThinLTOPreLink));16531654// Run partial inlining pass to partially inline functions that have1655// large bodies.1656// FIXME: It isn't clear whether this is really the right place to run this1657// in ThinLTO. Because there is another canonicalization and simplification1658// phase that will run after the thin link, running this here ends up with1659// less information than will be available later and it may grow functions in1660// ways that aren't beneficial.1661if (RunPartialInlining)1662MPM.addPass(PartialInlinerPass());16631664if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&1665PGOOpt->Action == PGOOptions::SampleUse)1666MPM.addPass(PseudoProbeUpdatePass());16671668// Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual1669// optimization is going to be done in PostLink stage, but clang can't add1670// callbacks there in case of in-process ThinLTO called by linker.1671invokeOptimizerEarlyEPCallbacks(MPM, Level);1672invokeOptimizerLastEPCallbacks(MPM, Level);16731674// Emit annotation remarks.1675addAnnotationRemarksPass(MPM);16761677addRequiredLTOPreLinkPasses(MPM);16781679return MPM;1680}16811682ModulePassManager PassBuilder::buildThinLTODefaultPipeline(1683OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {1684ModulePassManager MPM;16851686if (ImportSummary) {1687// For ThinLTO we must apply the context disambiguation decisions early, to1688// ensure we can correctly match the callsites to summary data.1689if (EnableMemProfContextDisambiguation)1690MPM.addPass(MemProfContextDisambiguation(ImportSummary));16911692// These passes import type identifier resolutions for whole-program1693// devirtualization and CFI. They must run early because other passes may1694// disturb the specific instruction patterns that these passes look for,1695// creating dependencies on resolutions that may not appear in the summary.1696//1697// For example, GVN may transform the pattern assume(type.test) appearing in1698// two basic blocks into assume(phi(type.test, type.test)), which would1699// transform a dependency on a WPD resolution into a dependency on a type1700// identifier resolution for CFI.1701//1702// Also, WPD has access to more precise information than ICP and can1703// devirtualize more effectively, so it should operate on the IR first.1704//1705// The WPD and LowerTypeTest passes need to run at -O0 to lower type1706// metadata and intrinsics.1707MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));1708MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));1709}17101711if (Level == OptimizationLevel::O0) {1712// Run a second time to clean up any type tests left behind by WPD for use1713// in ICP.1714MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));1715// Drop available_externally and unreferenced globals. This is necessary1716// with ThinLTO in order to avoid leaving undefined references to dead1717// globals in the object file.1718MPM.addPass(EliminateAvailableExternallyPass());1719MPM.addPass(GlobalDCEPass());1720return MPM;1721}17221723// Add the core simplification pipeline.1724MPM.addPass(buildModuleSimplificationPipeline(1725Level, ThinOrFullLTOPhase::ThinLTOPostLink));17261727// Now add the optimization pipeline.1728MPM.addPass(buildModuleOptimizationPipeline(1729Level, ThinOrFullLTOPhase::ThinLTOPostLink));17301731// Emit annotation remarks.1732addAnnotationRemarksPass(MPM);17331734return MPM;1735}17361737ModulePassManager1738PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {1739// FIXME: We should use a customized pre-link pipeline!1740return buildPerModuleDefaultPipeline(Level,1741/* LTOPreLink */ true);1742}17431744ModulePassManager1745PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,1746ModuleSummaryIndex *ExportSummary) {1747ModulePassManager MPM;17481749invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);17501751// Create a function that performs CFI checks for cross-DSO calls with targets1752// in the current module.1753MPM.addPass(CrossDSOCFIPass());17541755if (Level == OptimizationLevel::O0) {1756// The WPD and LowerTypeTest passes need to run at -O0 to lower type1757// metadata and intrinsics.1758MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));1759MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));1760// Run a second time to clean up any type tests left behind by WPD for use1761// in ICP.1762MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));17631764invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);17651766// Emit annotation remarks.1767addAnnotationRemarksPass(MPM);17681769return MPM;1770}17711772if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {1773// Load sample profile before running the LTO optimization pipeline.1774MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,1775PGOOpt->ProfileRemappingFile,1776ThinOrFullLTOPhase::FullLTOPostLink));1777// Cache ProfileSummaryAnalysis once to avoid the potential need to insert1778// RequireAnalysisPass for PSI before subsequent non-module passes.1779MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());1780}17811782// Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.1783MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));17841785// Remove unused virtual tables to improve the quality of code generated by1786// whole-program devirtualization and bitset lowering.1787MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));17881789// Do basic inference of function attributes from known properties of system1790// libraries and other oracles.1791MPM.addPass(InferFunctionAttrsPass());17921793if (Level.getSpeedupLevel() > 1) {1794MPM.addPass(createModuleToFunctionPassAdaptor(1795CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));17961797// Indirect call promotion. This should promote all the targets that are1798// left by the earlier promotion pass that promotes intra-module targets.1799// This two-step promotion is to save the compile time. For LTO, it should1800// produce the same result as if we only do promotion here.1801MPM.addPass(PGOIndirectCallPromotion(1802true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));18031804// Propagate constants at call sites into the functions they call. This1805// opens opportunities for globalopt (and inlining) by substituting function1806// pointers passed as arguments to direct uses of functions.1807MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/1808Level != OptimizationLevel::Os &&1809Level != OptimizationLevel::Oz)));18101811// Attach metadata to indirect call sites indicating the set of functions1812// they may target at run-time. This should follow IPSCCP.1813MPM.addPass(CalledValuePropagationPass());1814}18151816// Now deduce any function attributes based in the current code.1817MPM.addPass(1818createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));18191820// Do RPO function attribute inference across the module to forward-propagate1821// attributes where applicable.1822// FIXME: Is this really an optimization rather than a canonicalization?1823MPM.addPass(ReversePostOrderFunctionAttrsPass());18241825// Use in-range annotations on GEP indices to split globals where beneficial.1826MPM.addPass(GlobalSplitPass());18271828// Run whole program optimization of virtual call when the list of callees1829// is fixed.1830MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));18311832// Stop here at -O1.1833if (Level == OptimizationLevel::O1) {1834// The LowerTypeTestsPass needs to run to lower type metadata and the1835// type.test intrinsics. The pass does nothing if CFI is disabled.1836MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));1837// Run a second time to clean up any type tests left behind by WPD for use1838// in ICP (which is performed earlier than this in the regular LTO1839// pipeline).1840MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));18411842invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);18431844// Emit annotation remarks.1845addAnnotationRemarksPass(MPM);18461847return MPM;1848}18491850// Optimize globals to try and fold them into constants.1851MPM.addPass(GlobalOptPass());18521853// Promote any localized globals to SSA registers.1854MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));18551856// Linking modules together can lead to duplicate global constant, only1857// keep one copy of each constant.1858MPM.addPass(ConstantMergePass());18591860// Remove unused arguments from functions.1861MPM.addPass(DeadArgumentEliminationPass());18621863// Reduce the code after globalopt and ipsccp. Both can open up significant1864// simplification opportunities, and both can propagate functions through1865// function pointers. When this happens, we often have to resolve varargs1866// calls, etc, so let instcombine do this.1867FunctionPassManager PeepholeFPM;1868PeepholeFPM.addPass(InstCombinePass());1869if (Level.getSpeedupLevel() > 1)1870PeepholeFPM.addPass(AggressiveInstCombinePass());1871invokePeepholeEPCallbacks(PeepholeFPM, Level);18721873MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),1874PTO.EagerlyInvalidateAnalyses));18751876// Note: historically, the PruneEH pass was run first to deduce nounwind and1877// generally clean up exception handling overhead. It isn't clear this is1878// valuable as the inliner doesn't currently care whether it is inlining an1879// invoke or a call.1880// Run the inliner now.1881if (EnableModuleInliner) {1882MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),1883UseInlineAdvisor,1884ThinOrFullLTOPhase::FullLTOPostLink));1885} else {1886MPM.addPass(ModuleInlinerWrapperPass(1887getInlineParamsFromOptLevel(Level),1888/* MandatoryFirst */ true,1889InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,1890InlinePass::CGSCCInliner}));1891}18921893// Perform context disambiguation after inlining, since that would reduce the1894// amount of additional cloning required to distinguish the allocation1895// contexts.1896if (EnableMemProfContextDisambiguation)1897MPM.addPass(MemProfContextDisambiguation());18981899// Optimize globals again after we ran the inliner.1900MPM.addPass(GlobalOptPass());19011902// Run the OpenMPOpt pass again after global optimizations.1903MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));19041905// Garbage collect dead functions.1906MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));19071908// If we didn't decide to inline a function, check to see if we can1909// transform it to pass arguments by value instead of by reference.1910MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));19111912FunctionPassManager FPM;1913// The IPO Passes may leave cruft around. Clean up after them.1914FPM.addPass(InstCombinePass());1915invokePeepholeEPCallbacks(FPM, Level);19161917if (EnableConstraintElimination)1918FPM.addPass(ConstraintEliminationPass());19191920FPM.addPass(JumpThreadingPass());19211922// Do a post inline PGO instrumentation and use pass. This is a context1923// sensitive PGO pass.1924if (PGOOpt) {1925if (PGOOpt->CSAction == PGOOptions::CSIRInstr)1926addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,1927/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,1928PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,1929PGOOpt->FS);1930else if (PGOOpt->CSAction == PGOOptions::CSIRUse)1931addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,1932/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,1933PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,1934PGOOpt->FS);1935}19361937// Break up allocas1938FPM.addPass(SROAPass(SROAOptions::ModifyCFG));19391940// LTO provides additional opportunities for tailcall elimination due to1941// link-time inlining, and visibility of nocapture attribute.1942FPM.addPass(TailCallElimPass());19431944// Run a few AA driver optimizations here and now to cleanup the code.1945MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),1946PTO.EagerlyInvalidateAnalyses));19471948MPM.addPass(1949createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));19501951// Require the GlobalsAA analysis for the module so we can query it within1952// MainFPM.1953if (EnableGlobalAnalyses) {1954MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());1955// Invalidate AAManager so it can be recreated and pick up the newly1956// available GlobalsAA.1957MPM.addPass(1958createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));1959}19601961FunctionPassManager MainFPM;1962MainFPM.addPass(createFunctionToLoopPassAdaptor(1963LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,1964/*AllowSpeculation=*/true),1965/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));19661967if (RunNewGVN)1968MainFPM.addPass(NewGVNPass());1969else1970MainFPM.addPass(GVNPass());19711972// Remove dead memcpy()'s.1973MainFPM.addPass(MemCpyOptPass());19741975// Nuke dead stores.1976MainFPM.addPass(DSEPass());1977MainFPM.addPass(MoveAutoInitPass());1978MainFPM.addPass(MergedLoadStoreMotionPass());19791980LoopPassManager LPM;1981if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)1982LPM.addPass(LoopFlattenPass());1983LPM.addPass(IndVarSimplifyPass());1984LPM.addPass(LoopDeletionPass());1985// FIXME: Add loop interchange.19861987// Unroll small loops and perform peeling.1988LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),1989/* OnlyWhenForced= */ !PTO.LoopUnrolling,1990PTO.ForgetAllSCEVInLoopUnroll));1991// The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.1992// *All* loop passes must preserve it, in order to be able to use it.1993MainFPM.addPass(createFunctionToLoopPassAdaptor(1994std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));19951996MainFPM.addPass(LoopDistributePass());19971998addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);19992000// Run the OpenMPOpt CGSCC pass again late.2001MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(2002OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));20032004invokePeepholeEPCallbacks(MainFPM, Level);2005MainFPM.addPass(JumpThreadingPass());2006MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),2007PTO.EagerlyInvalidateAnalyses));20082009// Lower type metadata and the type.test intrinsic. This pass supports2010// clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs2011// to be run at link time if CFI is enabled. This pass does nothing if2012// CFI is disabled.2013MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));2014// Run a second time to clean up any type tests left behind by WPD for use2015// in ICP (which is performed earlier than this in the regular LTO pipeline).2016MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));20172018// Enable splitting late in the FullLTO post-link pipeline.2019if (EnableHotColdSplit)2020MPM.addPass(HotColdSplittingPass());20212022// Add late LTO optimization passes.2023FunctionPassManager LateFPM;20242025// LoopSink pass sinks instructions hoisted by LICM, which serves as a2026// canonicalization pass that enables other optimizations. As a result,2027// LoopSink pass needs to be a very late IR pass to avoid undoing LICM2028// result too early.2029LateFPM.addPass(LoopSinkPass());20302031// This hoists/decomposes div/rem ops. It should run after other sink/hoist2032// passes to avoid re-sinking, but before SimplifyCFG because it can allow2033// flattening of blocks.2034LateFPM.addPass(DivRemPairsPass());20352036// Delete basic blocks, which optimization passes may have killed.2037LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()2038.convertSwitchRangeToICmp(true)2039.hoistCommonInsts(true)2040.speculateUnpredictables(true)));2041MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));20422043// Drop bodies of available eternally objects to improve GlobalDCE.2044MPM.addPass(EliminateAvailableExternallyPass());20452046// Now that we have optimized the program, discard unreachable functions.2047MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));20482049if (PTO.MergeFunctions)2050MPM.addPass(MergeFunctionsPass());20512052if (PTO.CallGraphProfile)2053MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));20542055invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);20562057// Emit annotation remarks.2058addAnnotationRemarksPass(MPM);20592060return MPM;2061}20622063ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,2064bool LTOPreLink) {2065assert(Level == OptimizationLevel::O0 &&2066"buildO0DefaultPipeline should only be used with O0");20672068ModulePassManager MPM;20692070// Perform pseudo probe instrumentation in O0 mode. This is for the2071// consistency between different build modes. For example, a LTO build can be2072// mixed with an O0 prelink and an O2 postlink. Loading a sample profile in2073// the postlink will require pseudo probe instrumentation in the prelink.2074if (PGOOpt && PGOOpt->PseudoProbeForProfiling)2075MPM.addPass(SampleProfileProbePass(TM));20762077if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||2078PGOOpt->Action == PGOOptions::IRUse))2079addPGOInstrPassesForO0(2080MPM,2081/*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),2082/*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,2083PGOOpt->ProfileRemappingFile, PGOOpt->FS);20842085// Instrument function entry and exit before all inlining.2086MPM.addPass(createModuleToFunctionPassAdaptor(2087EntryExitInstrumenterPass(/*PostInlining=*/false)));20882089invokePipelineStartEPCallbacks(MPM, Level);20902091if (PGOOpt && PGOOpt->DebugInfoForProfiling)2092MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));20932094invokePipelineEarlySimplificationEPCallbacks(MPM, Level);20952096// Build a minimal pipeline based on the semantics required by LLVM,2097// which is just that always inlining occurs. Further, disable generating2098// lifetime intrinsics to avoid enabling further optimizations during2099// code generation.2100MPM.addPass(AlwaysInlinerPass(2101/*InsertLifetimeIntrinsics=*/false));21022103if (PTO.MergeFunctions)2104MPM.addPass(MergeFunctionsPass());21052106if (EnableMatrix)2107MPM.addPass(2108createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));21092110if (!CGSCCOptimizerLateEPCallbacks.empty()) {2111CGSCCPassManager CGPM;2112invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);2113if (!CGPM.isEmpty())2114MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));2115}2116if (!LateLoopOptimizationsEPCallbacks.empty()) {2117LoopPassManager LPM;2118invokeLateLoopOptimizationsEPCallbacks(LPM, Level);2119if (!LPM.isEmpty()) {2120MPM.addPass(createModuleToFunctionPassAdaptor(2121createFunctionToLoopPassAdaptor(std::move(LPM))));2122}2123}2124if (!LoopOptimizerEndEPCallbacks.empty()) {2125LoopPassManager LPM;2126invokeLoopOptimizerEndEPCallbacks(LPM, Level);2127if (!LPM.isEmpty()) {2128MPM.addPass(createModuleToFunctionPassAdaptor(2129createFunctionToLoopPassAdaptor(std::move(LPM))));2130}2131}2132if (!ScalarOptimizerLateEPCallbacks.empty()) {2133FunctionPassManager FPM;2134invokeScalarOptimizerLateEPCallbacks(FPM, Level);2135if (!FPM.isEmpty())2136MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));2137}21382139invokeOptimizerEarlyEPCallbacks(MPM, Level);21402141if (!VectorizerStartEPCallbacks.empty()) {2142FunctionPassManager FPM;2143invokeVectorizerStartEPCallbacks(FPM, Level);2144if (!FPM.isEmpty())2145MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));2146}21472148ModulePassManager CoroPM;2149CoroPM.addPass(CoroEarlyPass());2150CGSCCPassManager CGPM;2151CGPM.addPass(CoroSplitPass());2152CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));2153CoroPM.addPass(CoroCleanupPass());2154CoroPM.addPass(GlobalDCEPass());2155MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));21562157invokeOptimizerLastEPCallbacks(MPM, Level);21582159if (LTOPreLink)2160addRequiredLTOPreLinkPasses(MPM);21612162MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));21632164return MPM;2165}21662167AAManager PassBuilder::buildDefaultAAPipeline() {2168AAManager AA;21692170// The order in which these are registered determines their priority when2171// being queried.21722173// First we register the basic alias analysis that provides the majority of2174// per-function local AA logic. This is a stateless, on-demand local set of2175// AA techniques.2176AA.registerFunctionAnalysis<BasicAA>();21772178// Next we query fast, specialized alias analyses that wrap IR-embedded2179// information about aliasing.2180AA.registerFunctionAnalysis<ScopedNoAliasAA>();2181AA.registerFunctionAnalysis<TypeBasedAA>();21822183// Add support for querying global aliasing information when available.2184// Because the `AAManager` is a function analysis and `GlobalsAA` is a module2185// analysis, all that the `AAManager` can do is query for any *cached*2186// results from `GlobalsAA` through a readonly proxy.2187if (EnableGlobalAnalyses)2188AA.registerModuleAnalysis<GlobalsAA>();21892190// Add target-specific alias analyses.2191if (TM)2192TM->registerDefaultAliasAnalyses(AA);21932194return AA;2195}219621972198