[Inliner] Put inline history into IR as !inline_history metadata (#190700)

(Reland of #190092 with verifier change to look through GlobalAliases)

So that it's preserved across all inline invocations rather than just
one inliner pass run.

This prevents cases where devirtualization in the simplification
pipeline uncovers inlining opportunities that should be discarded due to
inline history, but we dropped the inline history between inliner pass
runs, causing code size to blow up, sometimes exponentially.

For compile time reasons, we want to limit this to only call sites that
have the potential to inline through SCCs, potentially with the help of
devirtualization. This means that the callee is in a non-trivial
(Ref)SCC, or the call site was previously an indirect call, which can
potentially be devirtualized to call any function.

The CGSCCUpdater::InlinedInternalEdges logic still seems to be relevant
even with this change, as monster_scc.ll blows up if I remove that code.


http://llvm-compile-time-tracker.com/compare.php?from=e830d88e8ae5f44a97cc76136a0a4e83aa9157c0&to=ed535e732fc41b79ab8efda2417886cbd0812f7f&stat=instructions:u

Fixes #186926.
This commit is contained in:
Arthur Eubanks 2026-04-06 17:31:43 -07:00 committed by GitHub
parent fa70ee45be
commit 82505fbfc8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 394 additions and 213 deletions

View File

@ -8852,6 +8852,25 @@ Example:
!0 = !{ptr @a}
!1 = !{ptr @b}
'``inline_history``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The ``inline_history`` metadata may be attached to a call instruction. It
indicates that the call instruction has been inlined from the referenced
functions. The call itself should not be inlined if it is a call to any of the
referenced functions since that could result in infinite inlining as we
continually inline through mutually recursive functions.
This is intended to be added by and used by inliner passes.
The metadata operands must all be function pointers or ``null``. ``null`` can
appear when the referenced function is erased from the module, e.g. an internal
function that has had all calls to it inlined.
.. code-block:: text
call void @foo(), !inline_history !0
!0 = !{ptr @bar, null, ptr @baz}
Module Flags Metadata
=====================

View File

@ -17,26 +17,26 @@ namespace llvm {
class CallBase;
template <typename Fn> class function_ref;
template <typename T> class InlineOrder {
class InlineOrder {
public:
virtual ~InlineOrder() = default;
virtual size_t size() = 0;
virtual void push(const T &Elt) = 0;
virtual void push(CallBase *Elt) = 0;
virtual T pop() = 0;
virtual CallBase *pop() = 0;
virtual void erase_if(function_ref<bool(T)> Pred) = 0;
virtual void erase_if(function_ref<bool(CallBase *)> Pred) = 0;
bool empty() { return !size(); }
};
LLVM_ABI std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
LLVM_ABI std::unique_ptr<InlineOrder>
getDefaultInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params,
ModuleAnalysisManager &MAM, Module &M);
LLVM_ABI std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
LLVM_ABI std::unique_ptr<InlineOrder>
getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params,
ModuleAnalysisManager &MAM, Module &M);
@ -54,10 +54,9 @@ class PluginInlineOrderAnalysis
public:
LLVM_ABI static AnalysisKey Key;
typedef std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> (
*InlineOrderFactory)(FunctionAnalysisManager &FAM,
const InlineParams &Params,
ModuleAnalysisManager &MAM, Module &M);
typedef std::unique_ptr<InlineOrder> (*InlineOrderFactory)(
FunctionAnalysisManager &FAM, const InlineParams &Params,
ModuleAnalysisManager &MAM, Module &M);
PluginInlineOrderAnalysis(InlineOrderFactory Factory) : Factory(Factory) {
assert(Factory != nullptr &&

View File

@ -60,3 +60,4 @@ LLVM_FIXED_MD_KIND(MD_alloc_token, "alloc_token", 45)
LLVM_FIXED_MD_KIND(MD_implicit_ref, "implicit.ref", 46)
LLVM_FIXED_MD_KIND(MD_nofpclass, "nofpclass", 47)
LLVM_FIXED_MD_KIND(MD_call_target, "call_target", 48)
LLVM_FIXED_MD_KIND(MD_inline_history, "inline_history", 49)

View File

@ -18,6 +18,7 @@
#define LLVM_TRANSFORMS_UTILS_CLONING_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
@ -88,6 +89,10 @@ struct ClonedCodeInfo {
/// check whether the main VMap mapping involves simplification or not.
DenseMap<const Value *, const Value *> OrigVMap;
// Cloned calls that were originally an indirect call. They may be direct or
// indirect after cloning.
SmallPtrSet<const Value *, 4> OriginallyIndirectCalls;
ClonedCodeInfo() = default;
bool isSimplified(const Value *From, const Value *To) const {
@ -225,11 +230,12 @@ LLVM_ABI void CloneFunctionBodyInto(
ValueMaterializer *Materializer = nullptr,
const MetadataPredicate *IdentityMD = nullptr);
LLVM_ABI void CloneAndPruneIntoFromInst(
Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst,
ValueToValueMapTy &VMap, bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = nullptr);
LLVM_ABI void
CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
ValueToValueMapTy &VMap, bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix, ClonedCodeInfo &CodeInfo);
/// This works exactly like CloneFunctionInto,
/// except that it does some simple constant prop and DCE on the fly. The
@ -242,10 +248,11 @@ LLVM_ABI void CloneAndPruneIntoFromInst(
/// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue
/// mappings.
///
LLVM_ABI void CloneAndPruneFunctionInto(
Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap,
bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = nullptr);
LLVM_ABI void
CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap, bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix, ClonedCodeInfo &CodeInfo);
/// This class captures the data input to the InlineFunction call, and records
/// the auxiliary results produced by it.
@ -311,6 +318,7 @@ LLVM_ABI void InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
bool MergeAttributes = false,
AAResults *CalleeAAR = nullptr,
bool InsertLifetime = true,
bool TrackInlineHistory = false,
Function *ForwardVarArgsTo = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);
@ -340,6 +348,7 @@ LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
bool MergeAttributes = false,
AAResults *CalleeAAR = nullptr,
bool InsertLifetime = true,
bool TrackInlineHistory = false,
Function *ForwardVarArgsTo = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);
@ -352,6 +361,7 @@ LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
bool MergeAttributes = false,
AAResults *CalleeAAR = nullptr,
bool InsertLifetime = true,
bool TrackInlineHistory = false,
Function *ForwardVarArgsTo = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);
@ -433,14 +443,6 @@ LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
Instruction *IStart, Instruction *IEnd,
LLVMContext &Context, StringRef Ext);
/// Check if Function F appears in the inline history chain.
/// InlineHistory is a vector of (Function, ParentHistoryID) pairs.
/// Returns true if F was already inlined in the chain leading to
/// InlineHistoryID.
LLVM_ABI bool
inlineHistoryIncludes(Function *F, int InlineHistoryID,
ArrayRef<std::pair<Function *, int>> InlineHistory);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CLONING_H

View File

@ -199,10 +199,7 @@ private:
int Cost = INT_MAX;
};
template <typename PriorityT>
class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
using T = std::pair<CallBase *, int>;
template <typename PriorityT> class PriorityInlineOrder : public InlineOrder {
bool hasLowerPriority(const CallBase *L, const CallBase *R) const {
const auto I1 = Priorities.find(L);
const auto I2 = Priorities.find(R);
@ -243,31 +240,21 @@ public:
size_t size() override { return Heap.size(); }
void push(const T &Elt) override {
CallBase *CB = Elt.first;
const int InlineHistoryID = Elt.second;
void push(CallBase *CB) override {
Heap.push_back(CB);
Priorities[CB] = PriorityT(CB, FAM, Params);
std::push_heap(Heap.begin(), Heap.end(), isLess);
InlineHistoryMap[CB] = InlineHistoryID;
}
T pop() override {
CallBase *pop() override {
assert(size() > 0);
pop_heap_adjust();
CallBase *CB = Heap.pop_back_val();
T Result = std::make_pair(CB, InlineHistoryMap[CB]);
InlineHistoryMap.erase(CB);
return Result;
return Heap.pop_back_val();
}
void erase_if(function_ref<bool(T)> Pred) override {
auto PredWrapper = [=](CallBase *CB) -> bool {
return Pred(std::make_pair(CB, InlineHistoryMap[CB]));
};
llvm::erase_if(Heap, PredWrapper);
void erase_if(function_ref<bool(CallBase *)> Pred) override {
llvm::erase_if(Heap, Pred);
std::make_heap(Heap.begin(), Heap.end(), isLess);
}
@ -284,7 +271,7 @@ private:
AnalysisKey llvm::PluginInlineOrderAnalysis::Key;
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
std::unique_ptr<InlineOrder>
llvm::getDefaultInlineOrder(FunctionAnalysisManager &FAM,
const InlineParams &Params,
ModuleAnalysisManager &MAM, Module &M) {
@ -309,9 +296,10 @@ llvm::getDefaultInlineOrder(FunctionAnalysisManager &FAM,
return nullptr;
}
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
llvm::getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params,
ModuleAnalysisManager &MAM, Module &M) {
std::unique_ptr<InlineOrder> llvm::getInlineOrder(FunctionAnalysisManager &FAM,
const InlineParams &Params,
ModuleAnalysisManager &MAM,
Module &M) {
if (MAM.isPassRegistered<PluginInlineOrderAnalysis>()) {
LLVM_DEBUG(dbgs() << " Current used priority: plugin ---- \n");
return MAM.getResult<PluginInlineOrderAnalysis>(M).Factory(FAM, Params, MAM,

View File

@ -549,6 +549,7 @@ private:
void visitAccessGroupMetadata(const MDNode *MD);
void visitCapturesMetadata(Instruction &I, const MDNode *Captures);
void visitAllocTokenMetadata(Instruction &I, MDNode *MD);
void visitInlineHistoryMetadata(Instruction &I, MDNode *MD);
template <class Ty> bool isValidMetadataArray(const MDTuple &N);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@ -5619,6 +5620,20 @@ void Verifier::visitAllocTokenMetadata(Instruction &I, MDNode *MD) {
"expected integer constant", MD);
}
void Verifier::visitInlineHistoryMetadata(Instruction &I, MDNode *MD) {
Check(isa<CallBase>(I), "!inline_history should only exist on calls", &I);
for (Metadata *Op : MD->operands()) {
// Can be null when a function is erased.
if (!Op)
continue;
Check(isa<ValueAsMetadata>(Op) &&
isa<Function>(cast<ValueAsMetadata>(Op)
->getValue()
->stripPointerCastsAndAliases()),
"!inline_history operands must be functions or null", MD);
}
}
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
@ -5851,6 +5866,9 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *MD = I.getMetadata(LLVMContext::MD_alloc_token))
visitAllocTokenMetadata(I, MD);
if (MDNode *MD = I.getMetadata(LLVMContext::MD_inline_history))
visitInlineHistoryMetadata(I, MD);
if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
CheckDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
visitMDNode(*N, AreDebugLocsAllowed::Yes);

View File

@ -53,8 +53,9 @@ bool AlwaysInlineImpl(
BasicBlock *Block = CB.getParent();
InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
InlineResult Res = InlineFunction(CB, IFI, /*MergeAttributes=*/true,
&GetAAR(Callee), InsertLifetime);
InlineResult Res = InlineFunction(
CB, IFI, /*MergeAttributes=*/true, &GetAAR(Callee), InsertLifetime,
/*TrackInlineHistory=*/NewCallSites != nullptr);
if (!Res.isSuccess()) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
@ -140,8 +141,7 @@ bool AlwaysInlineImpl(
continue;
// Detect recursion.
if (Callee == F ||
inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
if (Callee == F) {
ORE.emit([&]() {
return OptimizationRemarkMissed("inline", "NotInlined",
CB->getDebugLoc(), CB->getParent())

View File

@ -231,7 +231,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// this model, but it is uniformly spread across all the functions in the SCC
// and eventually they all become too large to inline, rather than
// incrementally maknig a single function grow in a super linear fashion.
SmallVector<std::pair<CallBase *, int>, 16> Calls;
SmallVector<CallBase *, 16> Calls;
// Populate the initial list of calls in this SCC.
for (auto &N : InitialC) {
@ -246,7 +246,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (auto *CB = dyn_cast<CallBase>(&I))
if (Function *Callee = CB->getCalledFunction()) {
if (!Callee->isDeclaration())
Calls.push_back({CB, -1});
Calls.push_back(CB);
else if (!isa<IntrinsicInst>(I)) {
using namespace ore;
setInlineRemark(*CB, "unavailable definition");
@ -269,12 +269,6 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (Calls.empty())
return PreservedAnalyses::all();
// When inlining a callee produces new call sites, we want to keep track of
// the fact that they were inlined from the callee. This allows us to avoid
// infinite inlining in some obscure cases. To represent this, we use an
// index into the InlineHistory vector.
SmallVector<std::pair<Function *, int>, 16> InlineHistory;
// Track a set vector of inlined callees so that we can augment the caller
// with all of their edges in the call graph before pruning out the ones that
// got simplified away.
@ -295,7 +289,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// have the same caller, so we first set up some shared infrastructure for
// this caller. We also do any pruning we can at this layer on the caller
// alone.
Function &F = *Calls[I].first->getCaller();
Function &F = *Calls[I]->getCaller();
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
@ -312,29 +306,17 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.
bool DidInline = false;
for (; I < (int)Calls.size() && Calls[I].first->getCaller() == &F; ++I) {
auto &P = Calls[I];
CallBase *CB = P.first;
const int InlineHistoryID = P.second;
for (; I < (int)Calls.size() && Calls[I]->getCaller() == &F; ++I) {
CallBase *CB = Calls[I];
Function &Callee = *CB->getCalledFunction();
if (InlineHistoryID != -1 &&
inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
LLVM_DEBUG(dbgs() << "Skipping inlining due to history: " << F.getName()
<< " -> " << Callee.getName() << "\n");
setInlineRemark(*CB, "recursive");
// Set noinline so that we don't forget this decision across CGSCC
// iterations.
CB->setIsNoInline();
continue;
}
// Check if this inlining may repeat breaking an SCC apart that has
// already been split once before. In that case, inlining here may
// trigger infinite inlining, much like is prevented within the inliner
// itself by the InlineHistory above, but spread across CGSCC iterations
// and thus hidden from the full inline history.
LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee));
LazyCallGraph::Node &CalleeN = *CG.lookup(Callee);
LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(CalleeN);
if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {
LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
"previously split out of this SCC by inlining: "
@ -367,9 +349,20 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
&FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
// For compile time reasons we try to only track inline history for the
// calls where it may actually prevent inlining, which is inlining through
// an SCC. This can happen if the callee is in a non-trivial SCC/RefSCC,
// or if an inlined call site was an indirect call, which can be
// devirtualized to call any target by replacing the indirectly called
// function with a function pointer referenced by the caller. The indirect
// call case is handled within InlineFunction.
bool TrackInlineHistory =
CalleeSCC->size() != 1 || CalleeSCC->getOuterRefSCC().size() != 1;
InlineResult IR = InlineFunction(
*CB, IFI, /*MergeAttributes=*/true,
&FAM.getResult<AAManager>(*CB->getCaller()), true, nullptr,
&FAM.getResult<AAManager>(*CB->getCaller()), /*InsertLifetime=*/true,
TrackInlineHistory, nullptr,
&FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB->getCaller()));
if (!IR.isSuccess()) {
Advice->recordUnsuccessfulInlining(IR);
@ -388,9 +381,6 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// Add any new callsites to defined functions to the worklist.
if (!IFI.InlinedCallSites.empty()) {
int NewHistoryID = InlineHistory.size();
InlineHistory.push_back({&Callee, InlineHistoryID});
for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
Function *NewCallee = ICB->getCalledFunction();
assert(!(NewCallee && NewCallee->isIntrinsic()) &&
@ -405,7 +395,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
if (NewCallee) {
if (!NewCallee->isDeclaration()) {
Calls.push_back({ICB, NewHistoryID});
Calls.push_back(ICB);
// Continually inlining through an SCC can result in huge compile
// times and bloated code since we arbitrarily stop at some point
// when the inliner decides it's not profitable to inline anymore.
@ -437,12 +427,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() &&
!CG.isLibFunction(Callee)) {
if (Callee.hasLocalLinkage() || !Callee.hasComdat()) {
Calls.erase(
std::remove_if(Calls.begin() + I + 1, Calls.end(),
[&](const std::pair<CallBase *, int> &Call) {
return Call.first->getCaller() == &Callee;
}),
Calls.end());
Calls.erase(std::remove_if(Calls.begin() + I + 1, Calls.end(),
[&](const CallBase *CB) {
return CB->getCaller() == &Callee;
}),
Calls.end());
// Report inlining decision BEFORE deleting function contents, so we
// can still access e.g. the DebugLoc

View File

@ -144,7 +144,7 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
if (auto *CB = dyn_cast<CallBase>(&I)) {
if (Function *Callee = CB->getCalledFunction()) {
if (!Callee->isDeclaration())
Calls->push({CB, -1});
Calls->push(CB);
else if (!isa<IntrinsicInst>(I)) {
using namespace ore;
setInlineRemark(*CB, "unavailable definition");
@ -166,26 +166,18 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
}
for (auto &[CB, Target] : ICPCandidates) {
if (auto *DirectCB = promoteCallWithIfThenElse(*CB, *Target, CtxProf))
Calls->push({DirectCB, -1});
Calls->push(DirectCB);
}
if (Calls->empty())
return PreservedAnalyses::all();
// When inlining a callee produces new call sites, we want to keep track of
// the fact that they were inlined from the callee. This allows us to avoid
// infinite inlining in some obscure cases. To represent this, we use an
// index into the InlineHistory vector.
SmallVector<std::pair<Function *, int>, 16> InlineHistory;
// Track the dead functions to delete once finished with inlining calls. We
// defer deleting these to make it easier to handle the call graph updates.
SmallVector<Function *, 4> DeadFunctions;
// Loop forward over all of the calls.
while (!Calls->empty()) {
auto P = Calls->pop();
CallBase *CB = P.first;
const int InlineHistoryID = P.second;
CallBase *CB = Calls->pop();
Function &F = *CB->getCaller();
Function &Callee = *CB->getCalledFunction();
@ -198,12 +190,6 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
return FAM.getResult<AssumptionAnalysis>(F);
};
if (InlineHistoryID != -1 &&
inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
setInlineRemark(*CB, "recursive");
continue;
}
auto Advice = Advisor.getAdvice(*CB, /*OnlyMandatory*/ false);
// Check whether we want to inline this callsite.
if (!Advice->isInliningRecommended()) {
@ -234,9 +220,6 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
// Add any new callsites to defined functions to the worklist.
if (!IFI.InlinedCallSites.empty()) {
int NewHistoryID = InlineHistory.size();
InlineHistory.push_back({&Callee, InlineHistoryID});
for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
Function *NewCallee = ICB->getCalledFunction();
if (!NewCallee) {
@ -251,7 +234,7 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
}
if (NewCallee)
if (!NewCallee->isDeclaration())
Calls->push({ICB, NewHistoryID});
Calls->push(ICB);
}
}
@ -266,9 +249,8 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
Callee.removeDeadConstantUsers();
// if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
if (Callee.use_empty() && !isKnownLibFunction(Callee, GetTLI(Callee))) {
Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
return Call.first->getCaller() == &Callee;
});
Calls->erase_if(
[&](const CallBase *CB) { return CB->getCaller() == &Callee; });
// Report inlining decision BEFORE deleting function contents, so we
// can still access e.g. the DebugLoc

View File

@ -1372,7 +1372,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
// We can only forward varargs when we outlined a single region, else we
// bail on vararg functions.
if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr,
/*InsertLifetime=*/true, /*TrackInlineHistory=*/true,
(Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
: nullptr))
.isSuccess())

View File

@ -433,7 +433,7 @@ struct PruningFunctionCloner {
ValueToValueMapTy &VMap;
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
ClonedCodeInfo &CodeInfo;
bool HostFuncIsStrictFP;
Instruction *cloneInstruction(BasicBlock::const_iterator II);
@ -441,7 +441,7 @@ struct PruningFunctionCloner {
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap, bool moduleLevelChanges,
const char *nameSuffix, ClonedCodeInfo *codeInfo)
const char *nameSuffix, ClonedCodeInfo &codeInfo)
: NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
CodeInfo(codeInfo) {
@ -615,6 +615,9 @@ void PruningFunctionCloner::CloneBlock(
}
}
if (auto *CB = dyn_cast<CallBase>(II); CB && CB->isIndirectCall())
CodeInfo.OriginallyIndirectCalls.insert(NewInst);
if (II->hasName())
NewInst->setName(II->getName() + NameSuffix);
VMap[&*II] = NewInst; // Add instruction map to value.
@ -626,12 +629,10 @@ void PruningFunctionCloner::CloneBlock(
CloneDbgRecordsToHere(NewInst, II);
if (CodeInfo) {
CodeInfo->OrigVMap[&*II] = NewInst;
if (auto *CB = dyn_cast<CallBase>(&*II))
if (CB->hasOperandBundles())
CodeInfo->OperandBundleCallSites.push_back(NewInst);
}
CodeInfo.OrigVMap[&*II] = NewInst;
if (auto *CB = dyn_cast<CallBase>(&*II))
if (CB->hasOperandBundles())
CodeInfo.OperandBundleCallSites.push_back(NewInst);
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
@ -690,12 +691,10 @@ void PruningFunctionCloner::CloneBlock(
VMap[OldTI] = NewInst; // Add instruction map to value.
if (CodeInfo) {
CodeInfo->OrigVMap[OldTI] = NewInst;
if (auto *CB = dyn_cast<CallBase>(OldTI))
if (CB->hasOperandBundles())
CodeInfo->OperandBundleCallSites.push_back(NewInst);
}
CodeInfo.OrigVMap[OldTI] = NewInst;
if (auto *CB = dyn_cast<CallBase>(OldTI))
if (CB->hasOperandBundles())
CodeInfo.OperandBundleCallSites.push_back(NewInst);
// Recursively clone any reachable successor blocks.
append_range(ToClone, successors(BB->getTerminator()));
@ -708,13 +707,11 @@ void PruningFunctionCloner::CloneBlock(
CloneDbgRecordsToHere(NewInst, OldTI->getIterator());
}
if (CodeInfo) {
CodeInfo->ContainsCalls |= hasCalls;
CodeInfo->ContainsMemProfMetadata |= hasMemProfMetadata;
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
CodeInfo->ContainsDynamicAllocas |=
hasStaticAllocas && BB != &BB->getParent()->front();
}
CodeInfo.ContainsCalls |= hasCalls;
CodeInfo.ContainsMemProfMetadata |= hasMemProfMetadata;
CodeInfo.ContainsDynamicAllocas |= hasDynamicAllocas;
CodeInfo.ContainsDynamicAllocas |=
hasStaticAllocas && BB != &BB->getParent()->front();
}
/// This works like CloneAndPruneFunctionInto, except that it does not clone the
@ -726,7 +723,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo) {
ClonedCodeInfo &CodeInfo) {
assert(NameSuffix && "NameSuffix cannot be null!");
ValueMapTypeRemapper *TypeMapper = nullptr;
@ -1006,10 +1003,12 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
/// constant arguments cause a significant amount of code in the callee to be
/// dead. Since this doesn't produce an exact copy of the input, it can't be
/// used for things like CloneFunction or CloneModule.
void llvm::CloneAndPruneFunctionInto(
Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap,
bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix,
ClonedCodeInfo &CodeInfo) {
CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
}

View File

@ -2412,13 +2412,15 @@ remapIndices(Function &Caller, BasicBlock *StartBB,
// Updating the contextual profile after an inlining means, at a high level,
// copying over the data of the callee, **intentionally without any value
// scaling**, and copying over the callees of the inlined callee.
llvm::InlineResult llvm::InlineFunction(
CallBase &CB, InlineFunctionInfo &IFI, PGOContextualProfile &CtxProf,
bool MergeAttributes, AAResults *CalleeAAR, bool InsertLifetime,
Function *ForwardVarArgsTo, OptimizationRemarkEmitter *ORE) {
llvm::InlineResult
llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
PGOContextualProfile &CtxProf, bool MergeAttributes,
AAResults *CalleeAAR, bool InsertLifetime,
bool TrackInlineHistory, Function *ForwardVarArgsTo,
OptimizationRemarkEmitter *ORE) {
if (!CtxProf.isInSpecializedModule())
return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
ForwardVarArgsTo, ORE);
TrackInlineHistory, ForwardVarArgsTo, ORE);
auto &Caller = *CB.getCaller();
auto &Callee = *CB.getCalledFunction();
@ -2436,7 +2438,7 @@ llvm::InlineResult llvm::InlineFunction(
const auto NumCalleeCallsites = CtxProf.getNumCallsites(Callee);
auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
ForwardVarArgsTo, ORE);
TrackInlineHistory, ForwardVarArgsTo, ORE);
if (!Ret.isSuccess())
return Ret;
@ -2522,6 +2524,18 @@ llvm::InlineResult llvm::CanInlineCallSite(const CallBase &CB,
CalledFunc->isDeclaration()) // call!
return InlineResult::failure("external or indirect");
// Don't inline if we've already inlined this callee through this call site
// before to prevent infinite inlining through mutually recursive functions.
if (MDNode *InlineHistory = CB.getMetadata(LLVMContext::MD_inline_history)) {
for (const auto &Op : InlineHistory->operands()) {
if (auto *MD = dyn_cast_or_null<ValueAsMetadata>(Op)) {
if (MD->getValue() == CalledFunc) {
return InlineResult::failure("inline history");
}
}
}
}
// The inliner does not know how to inline through calls with operand bundles
// in general ...
if (CB.hasOperandBundles()) {
@ -2647,7 +2661,8 @@ llvm::InlineResult llvm::CanInlineCallSite(const CallBase &CB,
/// function by one level.
void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
bool MergeAttributes, AAResults *CalleeAAR,
bool InsertLifetime, Function *ForwardVarArgsTo,
bool InsertLifetime, bool TrackInlineHistory,
Function *ForwardVarArgsTo,
OptimizationRemarkEmitter *ORE) {
BasicBlock *OrigBB = CB.getParent();
Function *Caller = OrigBB->getParent();
@ -2768,7 +2783,7 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
// happy with whatever the cloner can do.
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
&InlinedFunctionInfo);
InlinedFunctionInfo);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
@ -3269,6 +3284,35 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
IFI.InlinedCallSites.push_back(CB);
}
for (CallBase *ICB : IFI.InlinedCallSites) {
// We only track inline history if requested, or if the inlined call site
// was originally an indirect call (it may have become a direct call
// during inlining).
if (TrackInlineHistory ||
InlinedFunctionInfo.OriginallyIndirectCalls.contains(ICB)) {
// !inline_history is {Callee, CB.inline_history, ICB.inline_history}.
// Metadata nodes may be null if the referenced function was erased from
// the module.
SmallVector<Metadata *, 4> History;
History.push_back(ValueAsMetadata::get(CalledFunc));
if (MDNode *CBHistory = CB.getMetadata(LLVMContext::MD_inline_history)) {
for (const auto &Op : CBHistory->operands()) {
if (Op)
History.push_back(Op.get());
}
}
if (MDNode *CBHistory =
ICB->getMetadata(LLVMContext::MD_inline_history)) {
for (const auto &Op : CBHistory->operands()) {
if (Op)
History.push_back(Op.get());
}
}
MDNode *NewHistory = MDNode::get(Caller->getContext(), History);
ICB->setMetadata(LLVMContext::MD_inline_history, NewHistory);
}
}
// If we cloned in _exactly one_ basic block, and if that block ends in a
// return instruction, we splice the body of the inlined callee directly into
// the calling basic block.
@ -3474,30 +3518,15 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc);
}
llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
bool MergeAttributes,
AAResults *CalleeAAR,
bool InsertLifetime,
Function *ForwardVarArgsTo,
OptimizationRemarkEmitter *ORE) {
llvm::InlineResult llvm::InlineFunction(
CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes,
AAResults *CalleeAAR, bool InsertLifetime, bool TrackInlineHistory,
Function *ForwardVarArgsTo, OptimizationRemarkEmitter *ORE) {
llvm::InlineResult Result = CanInlineCallSite(CB, IFI);
if (Result.isSuccess()) {
InlineFunctionImpl(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
ForwardVarArgsTo, ORE);
TrackInlineHistory, ForwardVarArgsTo, ORE);
}
return Result;
}
bool llvm::inlineHistoryIncludes(
Function *F, int InlineHistoryID,
ArrayRef<std::pair<Function *, int>> InlineHistory) {
while (InlineHistoryID != -1) {
assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
"Invalid inline history ID");
if (InlineHistory[InlineHistoryID].first == F)
return true;
InlineHistoryID = InlineHistory[InlineHistoryID].second;
}
return false;
}

View File

@ -3,7 +3,7 @@
; Test that the debug location is preserved when rewriting an inlined call as an invoke
; CHECK: invoke void @test()
; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:!.*]]
; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:![0-9]+]]
; CHECK: [[SP:.*]] = distinct !DISubprogram(
; CHECK: [[INL_LOC]] = !DILocation(line: 1, scope: [[SP]], inlinedAt: [[INL_AT:.*]])
; CHECK: [[INL_AT]] = distinct !DILocation(line: 2, scope: [[SP]])

View File

@ -0,0 +1,25 @@
; RUN: opt -passes='cgscc(inline,function(instcombine))' -S < %s | FileCheck %s
; Check that devirtualization of an indirect call to itself doesn't cause
; infinite inlining. Note that we need the devirtualization to happen in
; simplification between inlining runs or else InlineCost will substitute %p
; with @p and see that the call is a recursive call and bail.
define void @p(ptr %p, i64 %x) {
%a = alloca ptr
store ptr %p, ptr %a
%g = getelementptr i8, ptr %a, i64 %x
%b = load ptr, ptr %g
call void %b(ptr %p, i64 %x)
ret void
}
define void @q() {
; CHECK-LABEL: define void @q() {
; CHECK-NEXT: call void @p({{.*}}), !inline_history [[HISTORY:![0-9]+]]
; CHECK-NEXT: ret void
call void @p(ptr @p, i64 0)
ret void
}
; CHECK: [[HISTORY]] = !{ptr @p}

View File

@ -1,32 +0,0 @@
; RUN: opt -passes=inline -S < %s | FileCheck %s
; This will inline @f1 into @a, causing two new calls to @f2, which will get inlined for two calls to @f1.
; The inline history should stop recursive inlining here, and make sure to mark the inlined calls as noinline so we don't repeat the inlining later on when @a gets inlined into @b.
define internal void @f1(ptr %p) {
call void %p(ptr @f1)
ret void
}
define internal void @f2(ptr %p) {
call void %p(ptr @f2)
call void %p(ptr @f2)
ret void
}
define void @b() {
; CHECK-LABEL: define void @b() {
; CHECK-NEXT: call void @f1(ptr @f2) #[[NOINLINE:[0-9]+]]
; CHECK-NEXT: call void @f1(ptr @f2) #[[NOINLINE]]
; CHECK-NEXT: ret void
;
call void @a()
ret void
}
define internal void @a() {
call void @f1(ptr @f2)
ret void
}
; CHECK: [[NOINLINE]] = { noinline }

View File

@ -0,0 +1,102 @@
; RUN: opt -passes=inline -S < %s | FileCheck %s
declare ptr @get()
declare ptr @foo1()
declare ptr @foo2()
; Don't need to track inline history for non-mutually recursive calls.
define internal void @x() noinline {
ret void
}
define internal void @y() {
call void @x()
ret void
}
define void @z() {
; CHECK-LABEL: define void @z() {
; CHECK-NEXT: call void @x(){{$}}
; CHECK-NEXT: ret void
call void @y()
ret void
}
; Indirect calls may be devirtualized, they need inline history tracking. In
; this case, @s is the history, but it gets deleted so the inline history
; metadata becomes null. This is fine since we cannot infinitely inline through
; @s anymore as it doesn't exist.
define internal void @s() {
%p = call ptr @get()
call void %p()
ret void
}
define void @t() {
; CHECK-LABEL: define void @t() {
; CHECK-NEXT: %p.i = call ptr @get()
; CHECK-NEXT: call void %p.i(), !inline_history [[HISTORY_T:![0-9]+]]
; CHECK-NEXT: ret void
call void @s()
ret void
}
; This will inline @f1 into @a, causing two new calls to @f2, which will get inlined for two calls to @f1.
; The inline history should stop recursive inlining here.
define internal void @f1(ptr %p) {
call void %p(ptr @f1)
ret void
}
define internal void @f2(ptr %p) {
call void %p(ptr @f2)
call void %p(ptr @f2)
ret void
}
define void @b() {
; CHECK-LABEL: define void @b() {
; CHECK-NEXT: call void @f1(ptr @f2), !inline_history [[HISTORY_B:![0-9]+]]
; CHECK-NEXT: call void @f1(ptr @f2), !inline_history [[HISTORY_B:![0-9]+]]
; CHECK-NEXT: ret void
;
call void @a()
ret void
}
define internal void @a() {
call void @f1(ptr @f2)
ret void
}
; Check that the inline history is
; {callee, processed call's inline_history, just-inlined call's inline_history}
define void @m(ptr %p) noinline {
ret void
}
define void @n() {
call void @m(ptr @n2), !inline_history !{ptr @foo1}
ret void
}
define void @n2() {
call void @m(ptr @n)
ret void
}
define void @o() {
; CHECK-LABEL: define void @o() {
; CHECK-NEXT: call void @m(ptr @n2), !inline_history [[HISTORY_O:![0-9]+]]
; CHECK-NEXT: ret void
call void @n(), !inline_history !{ptr @foo2}
ret void
}
; CHECK-DAG: [[HISTORY_T]] = distinct !{null}
; CHECK-DAG: [[HISTORY_B]] = !{ptr @f2, ptr @f1}
; CHECK-DAG: [[HISTORY_O]] = !{ptr @n, ptr @foo2, ptr @foo1}

View File

@ -14,7 +14,7 @@
; CHECK: Size after inlining: 17
; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test = tail call float @test(float %fneg, float %common.ret18.op.i)
; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test.i = tail call float @test(float %x, float %call.i)
; CHECK: Skipping inlining due to history: inline_rec_true_successor -> inline_rec_true_successor
; CHECK: NOT Inlining (cost=never): recursive, Call: %call.i = tail call float @inline_rec_true_successor(float %x, float %scale)
; CHECK: Updated inlining SCC: (test, inline_rec_true_successor)
; CHECK: Inlining calls in: test

View File

@ -0,0 +1,61 @@
; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
@x = global i32 0
@alias = alias void (), ptr @f
define void @f() {
ret void
}
; CHECK: !inline_history should only exist on calls
define void @wrong_instr(ptr %x) {
load ptr, ptr %x, !inline_history !{ptr @wrong_instr}
ret void
}
; CHECK: !inline_history operands must be functions or null
define void @global_value_operand() {
call void @f(), !inline_history !{ptr @x}
ret void
}
; CHECK: !inline_history operands must be functions or null
define void @metadata_operand() {
call void @f(), !inline_history !{!0}
ret void
}
; CHECK: !inline_history operands must be functions or null
define void @nullptr_operand() {
call void @f(), !inline_history !{ptr null}
ret void
}
; CHECK-NOT: !inline_history operands must be functions or null
define void @empty_metadata() {
call void @f(), !inline_history !{}
ret void
}
define void @null_metadata() {
call void @f(), !inline_history !{null}
ret void
}
define void @function_metadata() {
call void @f(), !inline_history !{ptr @f}
ret void
}
define void @alias_metadata() {
call void @f(), !inline_history !{ptr @alias}
ret void
}
define void @mixed_metadata() {
call void @f(), !inline_history !{null, ptr @f, null, ptr @mixed_metadata}
ret void
}
!0 = !{}

View File

@ -12,31 +12,29 @@ using namespace llvm;
namespace {
class NoFooInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
class NoFooInlineOrder : public InlineOrder {
public:
NoFooInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params,
ModuleAnalysisManager &MAM, Module &M) {
DefaultInlineOrder = getDefaultInlineOrder(FAM, Params, MAM, M);
}
size_t size() override { return DefaultInlineOrder->size(); }
void push(const std::pair<CallBase *, int> &Elt) override {
// We ignore calles named "foo"
if (Elt.first->getCalledFunction()->getName() == "foo") {
void push(CallBase *Elt) override {
// We ignore callees named "foo"
if (Elt->getCalledFunction()->getName() == "foo") {
DefaultInlineOrder->push(Elt);
}
}
std::pair<CallBase *, int> pop() override {
return DefaultInlineOrder->pop();
}
void erase_if(function_ref<bool(std::pair<CallBase *, int>)> Pred) override {
CallBase *pop() override { return DefaultInlineOrder->pop(); }
void erase_if(function_ref<bool(CallBase *)> Pred) override {
DefaultInlineOrder->erase_if(Pred);
}
private:
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> DefaultInlineOrder;
std::unique_ptr<InlineOrder> DefaultInlineOrder;
};
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
std::unique_ptr<InlineOrder>
NoFooInlineOrderFactory(FunctionAnalysisManager &FAM,
const InlineParams &Params, ModuleAnalysisManager &MAM,
Module &M) {