DFAJumpThreading JumpThreading LibCallsShrink LoopVectorize SLPVectorizer DeadStoreElimination AggressiveDCE CorrelatedValuePropagation IndVarSimplify These are part of the optimization pipeline, of which the legacy version is deprecated and being removed.
344 lines
14 KiB
C++
344 lines
14 KiB
C++
//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines the PassManagerBuilder class, which is used to set up a
|
|
// "standard" optimization sequence suitable for languages like C and C++.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/Analysis/GlobalsModRef.h"
|
|
#include "llvm/Analysis/ScopedNoAliasAA.h"
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
|
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
|
|
#include "llvm/IR/LegacyPassManager.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/ManagedStatic.h"
|
|
#include "llvm/Target/CGPassBuilderOption.h"
|
|
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
|
|
#include "llvm/Transforms/IPO.h"
|
|
#include "llvm/Transforms/IPO/Attributor.h"
|
|
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
|
|
#include "llvm/Transforms/IPO/FunctionAttrs.h"
|
|
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
|
|
#include "llvm/Transforms/InstCombine/InstCombine.h"
|
|
#include "llvm/Transforms/Instrumentation.h"
|
|
#include "llvm/Transforms/Scalar.h"
|
|
#include "llvm/Transforms/Scalar/GVN.h"
|
|
#include "llvm/Transforms/Scalar/LICM.h"
|
|
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
|
|
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
|
|
#include "llvm/Transforms/Utils.h"
|
|
#include "llvm/Transforms/Vectorize.h"
|
|
|
|
using namespace llvm;
|
|
|
|
PassManagerBuilder::PassManagerBuilder() {
|
|
OptLevel = 2;
|
|
SizeLevel = 0;
|
|
LibraryInfo = nullptr;
|
|
Inliner = nullptr;
|
|
DisableUnrollLoops = false;
|
|
SLPVectorize = false;
|
|
LoopVectorize = true;
|
|
LoopsInterleaved = true;
|
|
LicmMssaOptCap = SetLicmMssaOptCap;
|
|
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
|
|
DisableGVNLoadPRE = false;
|
|
ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
|
|
VerifyInput = false;
|
|
VerifyOutput = false;
|
|
DivergentTarget = false;
|
|
CallGraphProfile = true;
|
|
}
|
|
|
|
PassManagerBuilder::~PassManagerBuilder() {
|
|
delete LibraryInfo;
|
|
delete Inliner;
|
|
}
|
|
|
|
void PassManagerBuilder::addInitialAliasAnalysisPasses(
|
|
legacy::PassManagerBase &PM) const {
|
|
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
|
|
// BasicAliasAnalysis wins if they disagree. This is intended to help
|
|
// support "obvious" type-punning idioms.
|
|
PM.add(createTypeBasedAAWrapperPass());
|
|
PM.add(createScopedNoAliasAAWrapperPass());
|
|
}
|
|
|
|
void PassManagerBuilder::populateFunctionPassManager(
|
|
legacy::FunctionPassManager &FPM) {
|
|
// Add LibraryInfo if we have some.
|
|
if (LibraryInfo)
|
|
FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
|
|
|
|
if (OptLevel == 0) return;
|
|
|
|
addInitialAliasAnalysisPasses(FPM);
|
|
|
|
// Lower llvm.expect to metadata before attempting transforms.
|
|
// Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
|
|
FPM.add(createLowerExpectIntrinsicPass());
|
|
FPM.add(createCFGSimplificationPass());
|
|
FPM.add(createSROAPass());
|
|
FPM.add(createEarlyCSEPass());
|
|
}
|
|
|
|
void PassManagerBuilder::addFunctionSimplificationPasses(
|
|
legacy::PassManagerBase &MPM) {
|
|
// Start of function pass.
|
|
// Break up aggregate allocas, using SSAUpdater.
|
|
assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!");
|
|
MPM.add(createSROAPass());
|
|
MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
|
|
|
|
if (OptLevel > 1) {
|
|
// Speculative execution if the target has divergent branches; otherwise nop.
|
|
MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
|
|
}
|
|
MPM.add(
|
|
createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
|
|
true))); // Merge & remove BBs
|
|
// Combine silly seq's
|
|
MPM.add(createInstructionCombiningPass());
|
|
|
|
// TODO: Investigate the cost/benefit of tail call elimination on debugging.
|
|
if (OptLevel > 1)
|
|
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
|
|
MPM.add(
|
|
createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
|
|
true))); // Merge & remove BBs
|
|
MPM.add(createReassociatePass()); // Reassociate expressions
|
|
|
|
// Begin the loop pass pipeline.
|
|
|
|
// The simple loop unswitch pass relies on separate cleanup passes. Schedule
|
|
// them first so when we re-process a loop they run before other loop
|
|
// passes.
|
|
MPM.add(createLoopInstSimplifyPass());
|
|
MPM.add(createLoopSimplifyCFGPass());
|
|
|
|
// Try to remove as much code from the loop header as possible,
|
|
// to reduce amount of IR that will have to be duplicated. However,
|
|
// do not perform speculative hoisting the first time as LICM
|
|
// will destroy metadata that may not need to be destroyed if run
|
|
// after loop rotation.
|
|
// TODO: Investigate promotion cap for O1.
|
|
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
|
|
/*AllowSpeculation=*/false));
|
|
// Rotate Loop - disable header duplication at -Oz
|
|
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false));
|
|
// TODO: Investigate promotion cap for O1.
|
|
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
|
|
/*AllowSpeculation=*/true));
|
|
MPM.add(createSimpleLoopUnswitchLegacyPass(OptLevel == 3));
|
|
// FIXME: We break the loop pass pipeline here in order to do full
|
|
// simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the
|
|
// need for this.
|
|
MPM.add(createCFGSimplificationPass(
|
|
SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
|
|
MPM.add(createInstructionCombiningPass());
|
|
// We resume loop passes creating a second loop pipeline here.
|
|
|
|
// Unroll small loops and perform peeling.
|
|
MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
|
ForgetAllSCEVInLoopUnroll));
|
|
// This ends the loop pass pipelines.
|
|
|
|
// Break up allocas that may now be splittable after loop unrolling.
|
|
MPM.add(createSROAPass());
|
|
|
|
if (OptLevel > 1) {
|
|
MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
|
|
MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
|
|
}
|
|
|
|
// Delete dead bit computations (instcombine runs after to fold away the dead
|
|
// computations, and then ADCE will run later to exploit any new DCE
|
|
// opportunities that creates).
|
|
MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
|
|
|
|
// Run instcombine after redundancy elimination to exploit opportunities
|
|
// opened up by them.
|
|
MPM.add(createInstructionCombiningPass());
|
|
|
|
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
|
|
// TODO: Investigate if this is too expensive at O1.
|
|
if (OptLevel > 1) {
|
|
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
|
|
/*AllowSpeculation=*/true));
|
|
}
|
|
|
|
// Merge & remove BBs and sink & hoist common instructions.
|
|
MPM.add(createCFGSimplificationPass(
|
|
SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
|
|
// Clean up after everything.
|
|
MPM.add(createInstructionCombiningPass());
|
|
}
|
|
|
|
/// FIXME: Should LTO cause any differences to this set of passes?
|
|
void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
|
|
bool IsFullLTO) {
|
|
if (IsFullLTO) {
|
|
// The vectorizer may have significantly shortened a loop body; unroll
|
|
// again. Unroll small loops to hide loop backedge latency and saturate any
|
|
// parallel execution resources of an out-of-order processor. We also then
|
|
// need to clean up redundancies and loop invariant code.
|
|
// FIXME: It would be really good to use a loop-integrated instruction
|
|
// combiner for cleanup here so that the unrolling and LICM can be pipelined
|
|
// across the loop nests.
|
|
PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
|
ForgetAllSCEVInLoopUnroll));
|
|
}
|
|
|
|
// Cleanup after the loop optimization passes.
|
|
PM.add(createInstructionCombiningPass());
|
|
|
|
// Now that we've formed fast to execute loop structures, we do further
|
|
// optimizations. These are run afterward as they might block doing complex
|
|
// analyses and transforms such as what are needed for loop vectorization.
|
|
|
|
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
|
|
// GVN, loop transforms, and others have already run, so it's now better to
|
|
// convert to more optimized IR using more aggressive simplify CFG options.
|
|
// The extra sinking transform can create larger basic blocks, so do this
|
|
// before SLP vectorization.
|
|
PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
|
|
.forwardSwitchCondToPhi(true)
|
|
.convertSwitchRangeToICmp(true)
|
|
.convertSwitchToLookupTable(true)
|
|
.needCanonicalLoops(false)
|
|
.hoistCommonInsts(true)
|
|
.sinkCommonInsts(true)));
|
|
|
|
if (IsFullLTO) {
|
|
PM.add(createInstructionCombiningPass()); // Clean up again
|
|
PM.add(createBitTrackingDCEPass());
|
|
}
|
|
|
|
if (!IsFullLTO) {
|
|
PM.add(createInstructionCombiningPass());
|
|
|
|
// Unroll small loops
|
|
PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
|
ForgetAllSCEVInLoopUnroll));
|
|
|
|
if (!DisableUnrollLoops) {
|
|
// LoopUnroll may generate some redundency to cleanup.
|
|
PM.add(createInstructionCombiningPass());
|
|
|
|
// Runtime unrolling will introduce runtime check in loop prologue. If the
|
|
// unrolled loop is a inner loop, then the prologue will be inside the
|
|
// outer loop. LICM pass can help to promote the runtime check out if the
|
|
// checked value is loop invariant.
|
|
PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
|
|
/*AllowSpeculation=*/true));
|
|
}
|
|
}
|
|
|
|
// After vectorization and unrolling, assume intrinsics may tell us more
|
|
// about pointer alignments.
|
|
PM.add(createAlignmentFromAssumptionsPass());
|
|
|
|
if (IsFullLTO)
|
|
PM.add(createInstructionCombiningPass());
|
|
}
|
|
|
|
void PassManagerBuilder::populateModulePassManager(
|
|
legacy::PassManagerBase &MPM) {
|
|
// If all optimizations are disabled, just run the always-inline pass and,
|
|
// if enabled, the function merging pass.
|
|
if (OptLevel == 0) {
|
|
if (Inliner) {
|
|
MPM.add(Inliner);
|
|
Inliner = nullptr;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// Add LibraryInfo if we have some.
|
|
if (LibraryInfo)
|
|
MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
|
|
|
|
addInitialAliasAnalysisPasses(MPM);
|
|
|
|
if (OptLevel > 2)
|
|
MPM.add(createCallSiteSplittingPass());
|
|
|
|
// Promote any localized global vars.
|
|
MPM.add(createPromoteMemoryToRegisterPass());
|
|
|
|
MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
|
|
|
|
MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
|
|
MPM.add(
|
|
createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
|
|
true))); // Clean up after IPCP & DAE
|
|
|
|
// We add a module alias analysis pass here. In part due to bugs in the
|
|
// analysis infrastructure this "works" in that the analysis stays alive
|
|
// for the entire SCC pass run below.
|
|
MPM.add(createGlobalsAAWrapperPass());
|
|
|
|
// Start of CallGraph SCC passes.
|
|
if (Inliner) {
|
|
MPM.add(Inliner);
|
|
Inliner = nullptr;
|
|
}
|
|
|
|
addFunctionSimplificationPasses(MPM);
|
|
|
|
// FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
|
|
// pass manager that we are specifically trying to avoid. To prevent this
|
|
// we must insert a no-op module pass to reset the pass manager.
|
|
MPM.add(createBarrierNoopPass());
|
|
|
|
// We add a fresh GlobalsModRef run at this point. This is particularly
|
|
// useful as the above will have inlined, DCE'ed, and function-attr
|
|
// propagated everything. We should at this point have a reasonably minimal
|
|
// and richly annotated call graph. By computing aliasing and mod/ref
|
|
// information for all local globals here, the late loop passes and notably
|
|
// the vectorizer will be able to use them to help recognize vectorizable
|
|
// memory operations.
|
|
//
|
|
// Note that this relies on a bug in the pass manager which preserves
|
|
// a module analysis into a function pass pipeline (and throughout it) so
|
|
// long as the first function pass doesn't invalidate the module analysis.
|
|
// Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
|
|
// this to work. Fortunately, it is trivial to preserve AliasAnalysis
|
|
// (doing nothing preserves it as it is required to be conservatively
|
|
// correct in the face of IR changes).
|
|
MPM.add(createGlobalsAAWrapperPass());
|
|
|
|
MPM.add(createFloat2IntPass());
|
|
MPM.add(createLowerConstantIntrinsicsPass());
|
|
|
|
// Re-rotate loops in all our loop nests. These may have fallout out of
|
|
// rotated form due to GVN or other transformations, and the vectorizer relies
|
|
// on the rotated form. Disable header duplication at -Oz.
|
|
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false));
|
|
|
|
addVectorPasses(MPM, /* IsFullLTO */ false);
|
|
|
|
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
|
|
// canonicalization pass that enables other optimizations. As a result,
|
|
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
|
|
// result too early.
|
|
MPM.add(createLoopSinkPass());
|
|
// Get rid of LCSSA nodes.
|
|
MPM.add(createInstSimplifyLegacyPass());
|
|
|
|
// LoopSink (and other loop passes since the last simplifyCFG) might have
|
|
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
|
|
MPM.add(createCFGSimplificationPass(
|
|
SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
|
|
}
|