//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the PassManagerBuilder class, which is used to set up a // "standard" optimization sequence suitable for languages like C and C++. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Vectorize.h" using namespace llvm; PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; DisableUnrollLoops = false; SLPVectorize = false; LoopVectorize = true; LoopsInterleaved = true; LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; DisableGVNLoadPRE = false; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; VerifyInput = false; VerifyOutput = false; DivergentTarget = false; CallGraphProfile = true; } PassManagerBuilder::~PassManagerBuilder() { delete LibraryInfo; delete Inliner; } void PassManagerBuilder::addInitialAliasAnalysisPasses( legacy::PassManagerBase &PM) const { // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. PM.add(createTypeBasedAAWrapperPass()); PM.add(createScopedNoAliasAAWrapperPass()); } void PassManagerBuilder::populateFunctionPassManager( legacy::FunctionPassManager &FPM) { // Add LibraryInfo if we have some. if (LibraryInfo) FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); if (OptLevel == 0) return; addInitialAliasAnalysisPasses(FPM); // Lower llvm.expect to metadata before attempting transforms. // Compare/branch metadata may alter the behavior of passes like SimplifyCFG. FPM.add(createLowerExpectIntrinsicPass()); FPM.add(createCFGSimplificationPass()); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); } void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. // Break up aggregate allocas, using SSAUpdater. assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies if (OptLevel > 1) { // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); } MPM.add( createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( true))); // Merge & remove BBs // Combine silly seq's MPM.add(createInstructionCombiningPass()); // TODO: Investigate the cost/benefit of tail call elimination on debugging. if (OptLevel > 1) MPM.add(createTailCallEliminationPass()); // Eliminate tail calls MPM.add( createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( true))); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // Begin the loop pass pipeline. // The simple loop unswitch pass relies on separate cleanup passes. Schedule // them first so when we re-process a loop they run before other loop // passes. MPM.add(createLoopInstSimplifyPass()); MPM.add(createLoopSimplifyCFGPass()); // Try to remove as much code from the loop header as possible, // to reduce amount of IR that will have to be duplicated. However, // do not perform speculative hoisting the first time as LICM // will destroy metadata that may not need to be destroyed if run // after loop rotation. // TODO: Investigate promotion cap for O1. MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/false)); // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false)); // TODO: Investigate promotion cap for O1. MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true)); MPM.add(createSimpleLoopUnswitchLegacyPass(OptLevel == 3)); // FIXME: We break the loop pass pipeline here in order to do full // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the // need for this. MPM.add(createCFGSimplificationPass( SimplifyCFGOptions().convertSwitchRangeToICmp(true))); MPM.add(createInstructionCombiningPass()); // We resume loop passes creating a second loop pipeline here. // Unroll small loops and perform peeling. MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, ForgetAllSCEVInLoopUnroll)); // This ends the loop pass pipelines. // Break up allocas that may now be splittable after loop unrolling. MPM.add(createSROAPass()); if (OptLevel > 1) { MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies } // Delete dead bit computations (instcombine runs after to fold away the dead // computations, and then ADCE will run later to exploit any new DCE // opportunities that creates). MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. MPM.add(createInstructionCombiningPass()); MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset // TODO: Investigate if this is too expensive at O1. if (OptLevel > 1) { MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true)); } // Merge & remove BBs and sink & hoist common instructions. MPM.add(createCFGSimplificationPass( SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); // Clean up after everything. MPM.add(createInstructionCombiningPass()); } /// FIXME: Should LTO cause any differences to this set of passes? void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, bool IsFullLTO) { if (IsFullLTO) { // The vectorizer may have significantly shortened a loop body; unroll // again. Unroll small loops to hide loop backedge latency and saturate any // parallel execution resources of an out-of-order processor. We also then // need to clean up redundancies and loop invariant code. // FIXME: It would be really good to use a loop-integrated instruction // combiner for cleanup here so that the unrolling and LICM can be pipelined // across the loop nests. PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, ForgetAllSCEVInLoopUnroll)); } // Cleanup after the loop optimization passes. PM.add(createInstructionCombiningPass()); // Now that we've formed fast to execute loop structures, we do further // optimizations. These are run afterward as they might block doing complex // analyses and transforms such as what are needed for loop vectorization. // Cleanup after loop vectorization, etc. Simplification passes like CVP and // GVN, loop transforms, and others have already run, so it's now better to // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. PM.add(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) .sinkCommonInsts(true))); if (IsFullLTO) { PM.add(createInstructionCombiningPass()); // Clean up again PM.add(createBitTrackingDCEPass()); } if (!IsFullLTO) { PM.add(createInstructionCombiningPass()); // Unroll small loops PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, ForgetAllSCEVInLoopUnroll)); if (!DisableUnrollLoops) { // LoopUnroll may generate some redundency to cleanup. PM.add(createInstructionCombiningPass()); // Runtime unrolling will introduce runtime check in loop prologue. If the // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true)); } } // After vectorization and unrolling, assume intrinsics may tell us more // about pointer alignments. PM.add(createAlignmentFromAssumptionsPass()); if (IsFullLTO) PM.add(createInstructionCombiningPass()); } void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { // If all optimizations are disabled, just run the always-inline pass and, // if enabled, the function merging pass. if (OptLevel == 0) { if (Inliner) { MPM.add(Inliner); Inliner = nullptr; } return; } // Add LibraryInfo if we have some. if (LibraryInfo) MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); addInitialAliasAnalysisPasses(MPM); if (OptLevel > 2) MPM.add(createCallSiteSplittingPass()); // Promote any localized global vars. MPM.add(createPromoteMemoryToRegisterPass()); MPM.add(createDeadArgEliminationPass()); // Dead argument elimination MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE MPM.add( createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( true))); // Clean up after IPCP & DAE // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive // for the entire SCC pass run below. MPM.add(createGlobalsAAWrapperPass()); // Start of CallGraph SCC passes. if (Inliner) { MPM.add(Inliner); Inliner = nullptr; } addFunctionSimplificationPasses(MPM); // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); // We add a fresh GlobalsModRef run at this point. This is particularly // useful as the above will have inlined, DCE'ed, and function-attr // propagated everything. We should at this point have a reasonably minimal // and richly annotated call graph. By computing aliasing and mod/ref // information for all local globals here, the late loop passes and notably // the vectorizer will be able to use them to help recognize vectorizable // memory operations. // // Note that this relies on a bug in the pass manager which preserves // a module analysis into a function pass pipeline (and throughout it) so // long as the first function pass doesn't invalidate the module analysis. // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for // this to work. Fortunately, it is trivial to preserve AliasAnalysis // (doing nothing preserves it as it is required to be conservatively // correct in the face of IR changes). MPM.add(createGlobalsAAWrapperPass()); MPM.add(createFloat2IntPass()); MPM.add(createLowerConstantIntrinsicsPass()); // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. Disable header duplication at -Oz. MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false)); addVectorPasses(MPM, /* IsFullLTO */ false); // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM // result too early. MPM.add(createLoopSinkPass()); // Get rid of LCSSA nodes. MPM.add(createInstSimplifyLegacyPass()); // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. MPM.add(createCFGSimplificationPass( SimplifyCFGOptions().convertSwitchRangeToICmp(true))); }