diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 5584e0828d3c..f66cd37d9ec7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -8852,6 +8852,25 @@ Example: !0 = !{ptr @a} !1 = !{ptr @b} +'``inline_history``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The ``inline_history`` metadata may be attached to a call instruction. It +indicates that the call instruction has been inlined from the referenced +functions. The call itself should not be inlined if it is a call to any of the +referenced functions since that could result in infinite inlining as we +continually inline through mutually recursive functions. + +This is intended to be added by and used by inliner passes. + +The metadata operands must all be function pointers or ``null``. ``null`` can +appear when the referenced function is erased from the module, e.g. an internal +function that has had all calls to it inlined. + +.. code-block:: text + + call void @foo(), !inline_history !0 + + !0 = !{ptr @bar, null, ptr @baz} Module Flags Metadata ===================== diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h index bc96d546fda7..459afcde4715 100644 --- a/llvm/include/llvm/Analysis/InlineOrder.h +++ b/llvm/include/llvm/Analysis/InlineOrder.h @@ -17,26 +17,26 @@ namespace llvm { class CallBase; template class function_ref; -template class InlineOrder { +class InlineOrder { public: virtual ~InlineOrder() = default; virtual size_t size() = 0; - virtual void push(const T &Elt) = 0; + virtual void push(CallBase *Elt) = 0; - virtual T pop() = 0; + virtual CallBase *pop() = 0; - virtual void erase_if(function_ref Pred) = 0; + virtual void erase_if(function_ref Pred) = 0; bool empty() { return !size(); } }; -LLVM_ABI std::unique_ptr>> +LLVM_ABI std::unique_ptr getDefaultInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params, ModuleAnalysisManager &MAM, Module &M); -LLVM_ABI std::unique_ptr>> +LLVM_ABI std::unique_ptr getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params, ModuleAnalysisManager &MAM, Module &M); @@ -54,10 +54,9 @@ class PluginInlineOrderAnalysis public: LLVM_ABI static AnalysisKey Key; - typedef std::unique_ptr>> ( - *InlineOrderFactory)(FunctionAnalysisManager &FAM, - const InlineParams &Params, - ModuleAnalysisManager &MAM, Module &M); + typedef std::unique_ptr (*InlineOrderFactory)( + FunctionAnalysisManager &FAM, const InlineParams &Params, + ModuleAnalysisManager &MAM, Module &M); PluginInlineOrderAnalysis(InlineOrderFactory Factory) : Factory(Factory) { assert(Factory != nullptr && diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 0d79677d7079..6d74aff4ae25 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -60,3 +60,4 @@ LLVM_FIXED_MD_KIND(MD_alloc_token, "alloc_token", 45) LLVM_FIXED_MD_KIND(MD_implicit_ref, "implicit.ref", 46) LLVM_FIXED_MD_KIND(MD_nofpclass, "nofpclass", 47) LLVM_FIXED_MD_KIND(MD_call_target, "call_target", 48) +LLVM_FIXED_MD_KIND(MD_inline_history, "inline_history", 49) diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index ff4b390eb3e6..ced4cfcd6d23 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -18,6 +18,7 @@ #define LLVM_TRANSFORMS_UTILS_CLONING_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" @@ -88,6 +89,10 @@ struct ClonedCodeInfo { /// check whether the main VMap mapping involves simplification or not. DenseMap OrigVMap; + // Cloned calls that were originally an indirect call. They may be direct or + // indirect after cloning. + SmallPtrSet OriginallyIndirectCalls; + ClonedCodeInfo() = default; bool isSimplified(const Value *From, const Value *To) const { @@ -225,11 +230,12 @@ LLVM_ABI void CloneFunctionBodyInto( ValueMaterializer *Materializer = nullptr, const MetadataPredicate *IdentityMD = nullptr); -LLVM_ABI void CloneAndPruneIntoFromInst( - Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, - ValueToValueMapTy &VMap, bool ModuleLevelChanges, - SmallVectorImpl &Returns, const char *NameSuffix = "", - ClonedCodeInfo *CodeInfo = nullptr); +LLVM_ABI void +CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, + const Instruction *StartingInst, + ValueToValueMapTy &VMap, bool ModuleLevelChanges, + SmallVectorImpl &Returns, + const char *NameSuffix, ClonedCodeInfo &CodeInfo); /// This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The @@ -242,10 +248,11 @@ LLVM_ABI void CloneAndPruneIntoFromInst( /// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue /// mappings. /// -LLVM_ABI void CloneAndPruneFunctionInto( - Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, - bool ModuleLevelChanges, SmallVectorImpl &Returns, - const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = nullptr); +LLVM_ABI void +CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, bool ModuleLevelChanges, + SmallVectorImpl &Returns, + const char *NameSuffix, ClonedCodeInfo &CodeInfo); /// This class captures the data input to the InlineFunction call, and records /// the auxiliary results produced by it. @@ -311,6 +318,7 @@ LLVM_ABI void InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes = false, AAResults *CalleeAAR = nullptr, bool InsertLifetime = true, + bool TrackInlineHistory = false, Function *ForwardVarArgsTo = nullptr, OptimizationRemarkEmitter *ORE = nullptr); @@ -340,6 +348,7 @@ LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes = false, AAResults *CalleeAAR = nullptr, bool InsertLifetime = true, + bool TrackInlineHistory = false, Function *ForwardVarArgsTo = nullptr, OptimizationRemarkEmitter *ORE = nullptr); @@ -352,6 +361,7 @@ LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes = false, AAResults *CalleeAAR = nullptr, bool InsertLifetime = true, + bool TrackInlineHistory = false, Function *ForwardVarArgsTo = nullptr, OptimizationRemarkEmitter *ORE = nullptr); @@ -433,14 +443,6 @@ LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef NoAliasDeclScopes, LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef NoAliasDeclScopes, Instruction *IStart, Instruction *IEnd, LLVMContext &Context, StringRef Ext); -/// Check if Function F appears in the inline history chain. -/// InlineHistory is a vector of (Function, ParentHistoryID) pairs. -/// Returns true if F was already inlined in the chain leading to -/// InlineHistoryID. -LLVM_ABI bool -inlineHistoryIncludes(Function *F, int InlineHistoryID, - ArrayRef> InlineHistory); - } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CLONING_H diff --git a/llvm/lib/Analysis/InlineOrder.cpp b/llvm/lib/Analysis/InlineOrder.cpp index 8d920153f250..f54e040ff83c 100644 --- a/llvm/lib/Analysis/InlineOrder.cpp +++ b/llvm/lib/Analysis/InlineOrder.cpp @@ -199,10 +199,7 @@ private: int Cost = INT_MAX; }; -template -class PriorityInlineOrder : public InlineOrder> { - using T = std::pair; - +template class PriorityInlineOrder : public InlineOrder { bool hasLowerPriority(const CallBase *L, const CallBase *R) const { const auto I1 = Priorities.find(L); const auto I2 = Priorities.find(R); @@ -243,31 +240,21 @@ public: size_t size() override { return Heap.size(); } - void push(const T &Elt) override { - CallBase *CB = Elt.first; - const int InlineHistoryID = Elt.second; - + void push(CallBase *CB) override { Heap.push_back(CB); Priorities[CB] = PriorityT(CB, FAM, Params); std::push_heap(Heap.begin(), Heap.end(), isLess); - InlineHistoryMap[CB] = InlineHistoryID; } - T pop() override { + CallBase *pop() override { assert(size() > 0); pop_heap_adjust(); - CallBase *CB = Heap.pop_back_val(); - T Result = std::make_pair(CB, InlineHistoryMap[CB]); - InlineHistoryMap.erase(CB); - return Result; + return Heap.pop_back_val(); } - void erase_if(function_ref Pred) override { - auto PredWrapper = [=](CallBase *CB) -> bool { - return Pred(std::make_pair(CB, InlineHistoryMap[CB])); - }; - llvm::erase_if(Heap, PredWrapper); + void erase_if(function_ref Pred) override { + llvm::erase_if(Heap, Pred); std::make_heap(Heap.begin(), Heap.end(), isLess); } @@ -284,7 +271,7 @@ private: AnalysisKey llvm::PluginInlineOrderAnalysis::Key; -std::unique_ptr>> +std::unique_ptr llvm::getDefaultInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params, ModuleAnalysisManager &MAM, Module &M) { @@ -309,9 +296,10 @@ llvm::getDefaultInlineOrder(FunctionAnalysisManager &FAM, return nullptr; } -std::unique_ptr>> -llvm::getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params, - ModuleAnalysisManager &MAM, Module &M) { +std::unique_ptr llvm::getInlineOrder(FunctionAnalysisManager &FAM, + const InlineParams &Params, + ModuleAnalysisManager &MAM, + Module &M) { if (MAM.isPassRegistered()) { LLVM_DEBUG(dbgs() << " Current used priority: plugin ---- \n"); return MAM.getResult(M).Factory(FAM, Params, MAM, diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index cf9131c66d6c..7d5dc91a1504 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -549,6 +549,7 @@ private: void visitAccessGroupMetadata(const MDNode *MD); void visitCapturesMetadata(Instruction &I, const MDNode *Captures); void visitAllocTokenMetadata(Instruction &I, MDNode *MD); + void visitInlineHistoryMetadata(Instruction &I, MDNode *MD); template bool isValidMetadataArray(const MDTuple &N); #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N); @@ -5619,6 +5620,18 @@ void Verifier::visitAllocTokenMetadata(Instruction &I, MDNode *MD) { "expected integer constant", MD); } +void Verifier::visitInlineHistoryMetadata(Instruction &I, MDNode *MD) { + Check(isa(I), "!inline_history should only exist on calls", &I); + for (Metadata *Op : MD->operands()) { + // Can be null when a function is erased. + if (!Op) + continue; + Check(isa(Op) && + isa(cast(Op)->getValue()), + "!inline_history operands must be functions or null", MD); + } +} + /// verifyInstruction - Verify that an instruction is well formed. /// void Verifier::visitInstruction(Instruction &I) { @@ -5851,6 +5864,9 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *MD = I.getMetadata(LLVMContext::MD_alloc_token)) visitAllocTokenMetadata(I, MD); + if (MDNode *MD = I.getMetadata(LLVMContext::MD_inline_history)) + visitInlineHistoryMetadata(I, MD); + if (MDNode *N = I.getDebugLoc().getAsMDNode()) { CheckDI(isa(N), "invalid !dbg metadata attachment", &I, N); visitMDNode(*N, AreDebugLocsAllowed::Yes); diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp index d0e309427d32..080cb8ddb33f 100644 --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -53,8 +53,9 @@ bool AlwaysInlineImpl( BasicBlock *Block = CB.getParent(); InlineFunctionInfo IFI(GetAssumptionCache, &PSI); - InlineResult Res = InlineFunction(CB, IFI, /*MergeAttributes=*/true, - &GetAAR(Callee), InsertLifetime); + InlineResult Res = InlineFunction( + CB, IFI, /*MergeAttributes=*/true, &GetAAR(Callee), InsertLifetime, + /*TrackInlineHistory=*/NewCallSites != nullptr); if (!Res.isSuccess()) { ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) @@ -140,8 +141,7 @@ bool AlwaysInlineImpl( continue; // Detect recursion. - if (Callee == F || - inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) { + if (Callee == F) { ORE.emit([&]() { return OptimizationRemarkMissed("inline", "NotInlined", CB->getDebugLoc(), CB->getParent()) diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index d795fbbbe412..2c56df9a3e24 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -231,7 +231,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // this model, but it is uniformly spread across all the functions in the SCC // and eventually they all become too large to inline, rather than // incrementally maknig a single function grow in a super linear fashion. - SmallVector, 16> Calls; + SmallVector Calls; // Populate the initial list of calls in this SCC. for (auto &N : InitialC) { @@ -246,7 +246,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (auto *CB = dyn_cast(&I)) if (Function *Callee = CB->getCalledFunction()) { if (!Callee->isDeclaration()) - Calls.push_back({CB, -1}); + Calls.push_back(CB); else if (!isa(I)) { using namespace ore; setInlineRemark(*CB, "unavailable definition"); @@ -269,12 +269,6 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (Calls.empty()) return PreservedAnalyses::all(); - // When inlining a callee produces new call sites, we want to keep track of - // the fact that they were inlined from the callee. This allows us to avoid - // infinite inlining in some obscure cases. To represent this, we use an - // index into the InlineHistory vector. - SmallVector, 16> InlineHistory; - // Track a set vector of inlined callees so that we can augment the caller // with all of their edges in the call graph before pruning out the ones that // got simplified away. @@ -295,7 +289,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // have the same caller, so we first set up some shared infrastructure for // this caller. We also do any pruning we can at this layer on the caller // alone. - Function &F = *Calls[I].first->getCaller(); + Function &F = *Calls[I]->getCaller(); LazyCallGraph::Node &N = *CG.lookup(F); if (CG.lookupSCC(N) != C) continue; @@ -312,29 +306,17 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; - for (; I < (int)Calls.size() && Calls[I].first->getCaller() == &F; ++I) { - auto &P = Calls[I]; - CallBase *CB = P.first; - const int InlineHistoryID = P.second; + for (; I < (int)Calls.size() && Calls[I]->getCaller() == &F; ++I) { + CallBase *CB = Calls[I]; Function &Callee = *CB->getCalledFunction(); - if (InlineHistoryID != -1 && - inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { - LLVM_DEBUG(dbgs() << "Skipping inlining due to history: " << F.getName() - << " -> " << Callee.getName() << "\n"); - setInlineRemark(*CB, "recursive"); - // Set noinline so that we don't forget this decision across CGSCC - // iterations. - CB->setIsNoInline(); - continue; - } - // Check if this inlining may repeat breaking an SCC apart that has // already been split once before. In that case, inlining here may // trigger infinite inlining, much like is prevented within the inliner // itself by the InlineHistory above, but spread across CGSCC iterations // and thus hidden from the full inline history. - LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee)); + LazyCallGraph::Node &CalleeN = *CG.lookup(Callee); + LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(CalleeN); if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) { LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " @@ -367,9 +349,20 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, &FAM.getResult(*(CB->getCaller())), &FAM.getResult(Callee)); + // For compile time reasons we try to only track inline history for the + // calls where it may actually prevent inlining, which is inlining through + // an SCC. This can happen if the callee is in a non-trivial SCC/RefSCC, + // or if an inlined call site was an indirect call, which can be + // devirtualized to call any target by replacing the indirectly called + // function with a function pointer referenced by the caller. The indirect + // call case is handled within InlineFunction. + bool TrackInlineHistory = + CalleeSCC->size() != 1 || CalleeSCC->getOuterRefSCC().size() != 1; + InlineResult IR = InlineFunction( *CB, IFI, /*MergeAttributes=*/true, - &FAM.getResult(*CB->getCaller()), true, nullptr, + &FAM.getResult(*CB->getCaller()), /*InsertLifetime=*/true, + TrackInlineHistory, nullptr, &FAM.getResult(*CB->getCaller())); if (!IR.isSuccess()) { Advice->recordUnsuccessfulInlining(IR); @@ -388,9 +381,6 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // Add any new callsites to defined functions to the worklist. if (!IFI.InlinedCallSites.empty()) { - int NewHistoryID = InlineHistory.size(); - InlineHistory.push_back({&Callee, InlineHistoryID}); - for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { Function *NewCallee = ICB->getCalledFunction(); assert(!(NewCallee && NewCallee->isIntrinsic()) && @@ -405,7 +395,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, } if (NewCallee) { if (!NewCallee->isDeclaration()) { - Calls.push_back({ICB, NewHistoryID}); + Calls.push_back(ICB); // Continually inlining through an SCC can result in huge compile // times and bloated code since we arbitrarily stop at some point // when the inliner decides it's not profitable to inline anymore. @@ -437,12 +427,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() && !CG.isLibFunction(Callee)) { if (Callee.hasLocalLinkage() || !Callee.hasComdat()) { - Calls.erase( - std::remove_if(Calls.begin() + I + 1, Calls.end(), - [&](const std::pair &Call) { - return Call.first->getCaller() == &Callee; - }), - Calls.end()); + Calls.erase(std::remove_if(Calls.begin() + I + 1, Calls.end(), + [&](const CallBase *CB) { + return CB->getCaller() == &Callee; + }), + Calls.end()); // Report inlining decision BEFORE deleting function contents, so we // can still access e.g. the DebugLoc diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index 31c26c9fb8c0..79f545a9b328 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -144,7 +144,7 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, if (auto *CB = dyn_cast(&I)) { if (Function *Callee = CB->getCalledFunction()) { if (!Callee->isDeclaration()) - Calls->push({CB, -1}); + Calls->push(CB); else if (!isa(I)) { using namespace ore; setInlineRemark(*CB, "unavailable definition"); @@ -166,26 +166,18 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, } for (auto &[CB, Target] : ICPCandidates) { if (auto *DirectCB = promoteCallWithIfThenElse(*CB, *Target, CtxProf)) - Calls->push({DirectCB, -1}); + Calls->push(DirectCB); } if (Calls->empty()) return PreservedAnalyses::all(); - // When inlining a callee produces new call sites, we want to keep track of - // the fact that they were inlined from the callee. This allows us to avoid - // infinite inlining in some obscure cases. To represent this, we use an - // index into the InlineHistory vector. - SmallVector, 16> InlineHistory; - // Track the dead functions to delete once finished with inlining calls. We // defer deleting these to make it easier to handle the call graph updates. SmallVector DeadFunctions; // Loop forward over all of the calls. while (!Calls->empty()) { - auto P = Calls->pop(); - CallBase *CB = P.first; - const int InlineHistoryID = P.second; + CallBase *CB = Calls->pop(); Function &F = *CB->getCaller(); Function &Callee = *CB->getCalledFunction(); @@ -198,12 +190,6 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, return FAM.getResult(F); }; - if (InlineHistoryID != -1 && - inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { - setInlineRemark(*CB, "recursive"); - continue; - } - auto Advice = Advisor.getAdvice(*CB, /*OnlyMandatory*/ false); // Check whether we want to inline this callsite. if (!Advice->isInliningRecommended()) { @@ -234,9 +220,6 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, // Add any new callsites to defined functions to the worklist. if (!IFI.InlinedCallSites.empty()) { - int NewHistoryID = InlineHistory.size(); - InlineHistory.push_back({&Callee, InlineHistoryID}); - for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { Function *NewCallee = ICB->getCalledFunction(); if (!NewCallee) { @@ -251,7 +234,7 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, } if (NewCallee) if (!NewCallee->isDeclaration()) - Calls->push({ICB, NewHistoryID}); + Calls->push(ICB); } } @@ -266,9 +249,8 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, Callee.removeDeadConstantUsers(); // if (Callee.use_empty() && !CG.isLibFunction(Callee)) { if (Callee.use_empty() && !isKnownLibFunction(Callee, GetTLI(Callee))) { - Calls->erase_if([&](const std::pair &Call) { - return Call.first->getCaller() == &Callee; - }); + Calls->erase_if( + [&](const CallBase *CB) { return CB->getCaller() == &Callee; }); // Report inlining decision BEFORE deleting function contents, so we // can still access e.g. the DebugLoc diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index c00402f8ef10..0b5c5d17abf8 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1372,7 +1372,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { InlineFunctionInfo IFI(GetAssumptionCache, &PSI); // We can only forward varargs when we outlined a single region, else we // bail on vararg functions. - if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true, + if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, + /*InsertLifetime=*/true, /*TrackInlineHistory=*/true, (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first : nullptr)) .isSuccess()) diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index ac78e8251df9..8bf941cf19cd 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -433,7 +433,7 @@ struct PruningFunctionCloner { ValueToValueMapTy &VMap; bool ModuleLevelChanges; const char *NameSuffix; - ClonedCodeInfo *CodeInfo; + ClonedCodeInfo &CodeInfo; bool HostFuncIsStrictFP; Instruction *cloneInstruction(BasicBlock::const_iterator II); @@ -441,7 +441,7 @@ struct PruningFunctionCloner { public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, bool moduleLevelChanges, - const char *nameSuffix, ClonedCodeInfo *codeInfo) + const char *nameSuffix, ClonedCodeInfo &codeInfo) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), CodeInfo(codeInfo) { @@ -615,6 +615,9 @@ void PruningFunctionCloner::CloneBlock( } } + if (auto *CB = dyn_cast(II); CB && CB->isIndirectCall()) + CodeInfo.OriginallyIndirectCalls.insert(NewInst); + if (II->hasName()) NewInst->setName(II->getName() + NameSuffix); VMap[&*II] = NewInst; // Add instruction map to value. @@ -626,12 +629,10 @@ void PruningFunctionCloner::CloneBlock( CloneDbgRecordsToHere(NewInst, II); - if (CodeInfo) { - CodeInfo->OrigVMap[&*II] = NewInst; - if (auto *CB = dyn_cast(&*II)) - if (CB->hasOperandBundles()) - CodeInfo->OperandBundleCallSites.push_back(NewInst); - } + CodeInfo.OrigVMap[&*II] = NewInst; + if (auto *CB = dyn_cast(&*II)) + if (CB->hasOperandBundles()) + CodeInfo.OperandBundleCallSites.push_back(NewInst); if (const AllocaInst *AI = dyn_cast(II)) { if (isa(AI->getArraySize())) @@ -690,12 +691,10 @@ void PruningFunctionCloner::CloneBlock( VMap[OldTI] = NewInst; // Add instruction map to value. - if (CodeInfo) { - CodeInfo->OrigVMap[OldTI] = NewInst; - if (auto *CB = dyn_cast(OldTI)) - if (CB->hasOperandBundles()) - CodeInfo->OperandBundleCallSites.push_back(NewInst); - } + CodeInfo.OrigVMap[OldTI] = NewInst; + if (auto *CB = dyn_cast(OldTI)) + if (CB->hasOperandBundles()) + CodeInfo.OperandBundleCallSites.push_back(NewInst); // Recursively clone any reachable successor blocks. append_range(ToClone, successors(BB->getTerminator())); @@ -708,13 +707,11 @@ void PruningFunctionCloner::CloneBlock( CloneDbgRecordsToHere(NewInst, OldTI->getIterator()); } - if (CodeInfo) { - CodeInfo->ContainsCalls |= hasCalls; - CodeInfo->ContainsMemProfMetadata |= hasMemProfMetadata; - CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; - CodeInfo->ContainsDynamicAllocas |= - hasStaticAllocas && BB != &BB->getParent()->front(); - } + CodeInfo.ContainsCalls |= hasCalls; + CodeInfo.ContainsMemProfMetadata |= hasMemProfMetadata; + CodeInfo.ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo.ContainsDynamicAllocas |= + hasStaticAllocas && BB != &BB->getParent()->front(); } /// This works like CloneAndPruneFunctionInto, except that it does not clone the @@ -726,7 +723,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, bool ModuleLevelChanges, SmallVectorImpl &Returns, const char *NameSuffix, - ClonedCodeInfo *CodeInfo) { + ClonedCodeInfo &CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; @@ -1006,10 +1003,12 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. -void llvm::CloneAndPruneFunctionInto( - Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, - bool ModuleLevelChanges, SmallVectorImpl &Returns, - const char *NameSuffix, ClonedCodeInfo *CodeInfo) { +void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl &Returns, + const char *NameSuffix, + ClonedCodeInfo &CodeInfo) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, ModuleLevelChanges, Returns, NameSuffix, CodeInfo); } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 1c4df54b60f4..7eef188bbc57 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2412,13 +2412,15 @@ remapIndices(Function &Caller, BasicBlock *StartBB, // Updating the contextual profile after an inlining means, at a high level, // copying over the data of the callee, **intentionally without any value // scaling**, and copying over the callees of the inlined callee. -llvm::InlineResult llvm::InlineFunction( - CallBase &CB, InlineFunctionInfo &IFI, PGOContextualProfile &CtxProf, - bool MergeAttributes, AAResults *CalleeAAR, bool InsertLifetime, - Function *ForwardVarArgsTo, OptimizationRemarkEmitter *ORE) { +llvm::InlineResult +llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, + PGOContextualProfile &CtxProf, bool MergeAttributes, + AAResults *CalleeAAR, bool InsertLifetime, + bool TrackInlineHistory, Function *ForwardVarArgsTo, + OptimizationRemarkEmitter *ORE) { if (!CtxProf.isInSpecializedModule()) return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, - ForwardVarArgsTo, ORE); + TrackInlineHistory, ForwardVarArgsTo, ORE); auto &Caller = *CB.getCaller(); auto &Callee = *CB.getCalledFunction(); @@ -2436,7 +2438,7 @@ llvm::InlineResult llvm::InlineFunction( const auto NumCalleeCallsites = CtxProf.getNumCallsites(Callee); auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, - ForwardVarArgsTo, ORE); + TrackInlineHistory, ForwardVarArgsTo, ORE); if (!Ret.isSuccess()) return Ret; @@ -2522,6 +2524,18 @@ llvm::InlineResult llvm::CanInlineCallSite(const CallBase &CB, CalledFunc->isDeclaration()) // call! return InlineResult::failure("external or indirect"); + // Don't inline if we've already inlined this callee through this call site + // before to prevent infinite inlining through mutually recursive functions. + if (MDNode *InlineHistory = CB.getMetadata(LLVMContext::MD_inline_history)) { + for (const auto &Op : InlineHistory->operands()) { + if (auto *MD = dyn_cast_or_null(Op)) { + if (MD->getValue() == CalledFunc) { + return InlineResult::failure("inline history"); + } + } + } + } + // The inliner does not know how to inline through calls with operand bundles // in general ... if (CB.hasOperandBundles()) { @@ -2647,7 +2661,8 @@ llvm::InlineResult llvm::CanInlineCallSite(const CallBase &CB, /// function by one level. void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes, AAResults *CalleeAAR, - bool InsertLifetime, Function *ForwardVarArgsTo, + bool InsertLifetime, bool TrackInlineHistory, + Function *ForwardVarArgsTo, OptimizationRemarkEmitter *ORE) { BasicBlock *OrigBB = CB.getParent(); Function *Caller = OrigBB->getParent(); @@ -2768,7 +2783,7 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI, // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo); + InlinedFunctionInfo); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; @@ -3269,6 +3284,35 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI, IFI.InlinedCallSites.push_back(CB); } + for (CallBase *ICB : IFI.InlinedCallSites) { + // We only track inline history if requested, or if the inlined call site + // was originally an indirect call (it may have become a direct call + // during inlining). + if (TrackInlineHistory || + InlinedFunctionInfo.OriginallyIndirectCalls.contains(ICB)) { + // !inline_history is {Callee, CB.inline_history, ICB.inline_history}. + // Metadata nodes may be null if the referenced function was erased from + // the module. + SmallVector History; + History.push_back(ValueAsMetadata::get(CalledFunc)); + if (MDNode *CBHistory = CB.getMetadata(LLVMContext::MD_inline_history)) { + for (const auto &Op : CBHistory->operands()) { + if (Op) + History.push_back(Op.get()); + } + } + if (MDNode *CBHistory = + ICB->getMetadata(LLVMContext::MD_inline_history)) { + for (const auto &Op : CBHistory->operands()) { + if (Op) + History.push_back(Op.get()); + } + } + MDNode *NewHistory = MDNode::get(Caller->getContext(), History); + ICB->setMetadata(LLVMContext::MD_inline_history, NewHistory); + } + } + // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. @@ -3474,30 +3518,15 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI, AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); } -llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, - bool MergeAttributes, - AAResults *CalleeAAR, - bool InsertLifetime, - Function *ForwardVarArgsTo, - OptimizationRemarkEmitter *ORE) { +llvm::InlineResult llvm::InlineFunction( + CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes, + AAResults *CalleeAAR, bool InsertLifetime, bool TrackInlineHistory, + Function *ForwardVarArgsTo, OptimizationRemarkEmitter *ORE) { llvm::InlineResult Result = CanInlineCallSite(CB, IFI); if (Result.isSuccess()) { InlineFunctionImpl(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, - ForwardVarArgsTo, ORE); + TrackInlineHistory, ForwardVarArgsTo, ORE); } return Result; } - -bool llvm::inlineHistoryIncludes( - Function *F, int InlineHistoryID, - ArrayRef> InlineHistory) { - while (InlineHistoryID != -1) { - assert(unsigned(InlineHistoryID) < InlineHistory.size() && - "Invalid inline history ID"); - if (InlineHistory[InlineHistoryID].first == F) - return true; - InlineHistoryID = InlineHistory[InlineHistoryID].second; - } - return false; -} diff --git a/llvm/test/Transforms/Inline/debug-invoke.ll b/llvm/test/Transforms/Inline/debug-invoke.ll index 3cf1ca36e774..444363667fa2 100644 --- a/llvm/test/Transforms/Inline/debug-invoke.ll +++ b/llvm/test/Transforms/Inline/debug-invoke.ll @@ -3,7 +3,7 @@ ; Test that the debug location is preserved when rewriting an inlined call as an invoke ; CHECK: invoke void @test() -; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:!.*]] +; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:![0-9]+]] ; CHECK: [[SP:.*]] = distinct !DISubprogram( ; CHECK: [[INL_LOC]] = !DILocation(line: 1, scope: [[SP]], inlinedAt: [[INL_AT:.*]]) ; CHECK: [[INL_AT]] = distinct !DILocation(line: 2, scope: [[SP]]) diff --git a/llvm/test/Transforms/Inline/inline-history-2.ll b/llvm/test/Transforms/Inline/inline-history-2.ll new file mode 100644 index 000000000000..9cb12a352bbc --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-history-2.ll @@ -0,0 +1,25 @@ +; RUN: opt -passes='cgscc(inline,function(instcombine))' -S < %s | FileCheck %s + +; Check that devirtualization of an indirect call to itself doesn't cause +; infinite inlining. Note that we need the devirtualization to happen in +; simplification between inlining runs or else InlineCost will substitute %p +; with @p and see that the call is a recursive call and bail. + +define void @p(ptr %p, i64 %x) { + %a = alloca ptr + store ptr %p, ptr %a + %g = getelementptr i8, ptr %a, i64 %x + %b = load ptr, ptr %g + call void %b(ptr %p, i64 %x) + ret void +} + +define void @q() { +; CHECK-LABEL: define void @q() { +; CHECK-NEXT: call void @p({{.*}}), !inline_history [[HISTORY:![0-9]+]] +; CHECK-NEXT: ret void + call void @p(ptr @p, i64 0) + ret void +} + +; CHECK: [[HISTORY]] = !{ptr @p} diff --git a/llvm/test/Transforms/Inline/inline-history-noinline.ll b/llvm/test/Transforms/Inline/inline-history-noinline.ll deleted file mode 100644 index 742bd25ecd9b..000000000000 --- a/llvm/test/Transforms/Inline/inline-history-noinline.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: opt -passes=inline -S < %s | FileCheck %s - -; This will inline @f1 into @a, causing two new calls to @f2, which will get inlined for two calls to @f1. -; The inline history should stop recursive inlining here, and make sure to mark the inlined calls as noinline so we don't repeat the inlining later on when @a gets inlined into @b. - -define internal void @f1(ptr %p) { - call void %p(ptr @f1) - ret void -} - -define internal void @f2(ptr %p) { - call void %p(ptr @f2) - call void %p(ptr @f2) - ret void -} - -define void @b() { -; CHECK-LABEL: define void @b() { -; CHECK-NEXT: call void @f1(ptr @f2) #[[NOINLINE:[0-9]+]] -; CHECK-NEXT: call void @f1(ptr @f2) #[[NOINLINE]] -; CHECK-NEXT: ret void -; - call void @a() - ret void -} - -define internal void @a() { - call void @f1(ptr @f2) - ret void -} - -; CHECK: [[NOINLINE]] = { noinline } diff --git a/llvm/test/Transforms/Inline/inline-history.ll b/llvm/test/Transforms/Inline/inline-history.ll new file mode 100644 index 000000000000..5f2d67bba1d6 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-history.ll @@ -0,0 +1,102 @@ +; RUN: opt -passes=inline -S < %s | FileCheck %s + +declare ptr @get() +declare ptr @foo1() +declare ptr @foo2() + +; Don't need to track inline history for non-mutually recursive calls. + +define internal void @x() noinline { + ret void +} + +define internal void @y() { + call void @x() + ret void +} + +define void @z() { +; CHECK-LABEL: define void @z() { +; CHECK-NEXT: call void @x(){{$}} +; CHECK-NEXT: ret void + call void @y() + ret void +} + +; Indirect calls may be devirtualized, they need inline history tracking. In +; this case, @s is the history, but it gets deleted so the inline history +; metadata becomes null. This is fine since we cannot infinitely inline through +; @s anymore as it doesn't exist. + +define internal void @s() { + %p = call ptr @get() + call void %p() + ret void +} + +define void @t() { +; CHECK-LABEL: define void @t() { +; CHECK-NEXT: %p.i = call ptr @get() +; CHECK-NEXT: call void %p.i(), !inline_history [[HISTORY_T:![0-9]+]] +; CHECK-NEXT: ret void + call void @s() + ret void +} + +; This will inline @f1 into @a, causing two new calls to @f2, which will get inlined for two calls to @f1. +; The inline history should stop recursive inlining here. + +define internal void @f1(ptr %p) { + call void %p(ptr @f1) + ret void +} + +define internal void @f2(ptr %p) { + call void %p(ptr @f2) + call void %p(ptr @f2) + ret void +} + +define void @b() { +; CHECK-LABEL: define void @b() { +; CHECK-NEXT: call void @f1(ptr @f2), !inline_history [[HISTORY_B:![0-9]+]] +; CHECK-NEXT: call void @f1(ptr @f2), !inline_history [[HISTORY_B:![0-9]+]] +; CHECK-NEXT: ret void +; + call void @a() + ret void +} + +define internal void @a() { + call void @f1(ptr @f2) + ret void +} + +; Check that the inline history is +; {callee, processed call's inline_history, just-inlined call's inline_history} + +define void @m(ptr %p) noinline { + ret void +} + +define void @n() { + call void @m(ptr @n2), !inline_history !{ptr @foo1} + ret void +} + +define void @n2() { + call void @m(ptr @n) + ret void +} + +define void @o() { +; CHECK-LABEL: define void @o() { +; CHECK-NEXT: call void @m(ptr @n2), !inline_history [[HISTORY_O:![0-9]+]] +; CHECK-NEXT: ret void + call void @n(), !inline_history !{ptr @foo2} + ret void +} + +; CHECK-DAG: [[HISTORY_T]] = distinct !{null} +; CHECK-DAG: [[HISTORY_B]] = !{ptr @f2, ptr @f1} +; CHECK-DAG: [[HISTORY_O]] = !{ptr @n, ptr @foo2, ptr @foo1} diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll index 52d7b21902e8..27987fc845d5 100644 --- a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll +++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll @@ -14,7 +14,7 @@ ; CHECK: Size after inlining: 17 ; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test = tail call float @test(float %fneg, float %common.ret18.op.i) ; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test.i = tail call float @test(float %x, float %call.i) -; CHECK: Skipping inlining due to history: inline_rec_true_successor -> inline_rec_true_successor +; CHECK: NOT Inlining (cost=never): recursive, Call: %call.i = tail call float @inline_rec_true_successor(float %x, float %scale) ; CHECK: Updated inlining SCC: (test, inline_rec_true_successor) ; CHECK: Inlining calls in: test diff --git a/llvm/test/Verifier/inline-history-metadata.ll b/llvm/test/Verifier/inline-history-metadata.ll new file mode 100644 index 000000000000..4a56d042859f --- /dev/null +++ b/llvm/test/Verifier/inline-history-metadata.ll @@ -0,0 +1,55 @@ +; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s + +@x = global i32 0 + +define void @f() { + ret void +} + +; CHECK: !inline_history should only exist on calls +define void @wrong_instr(ptr %x) { + load ptr, ptr %x, !inline_history !{ptr @wrong_instr} + ret void +} + +; CHECK: !inline_history operands must be functions or null +define void @global_value_operand() { + call void @f(), !inline_history !{ptr @x} + ret void +} + +; CHECK: !inline_history operands must be functions or null +define void @metadata_operand() { + call void @f(), !inline_history !{!0} + ret void +} + +; CHECK: !inline_history operands must be functions or null +define void @nullptr_operand() { + call void @f(), !inline_history !{ptr null} + ret void +} + +; CHECK-NOT: !inline_history operands must be functions or null + +define void @empty_metadata() { + call void @f(), !inline_history !{} + ret void +} + +define void @null_metadata() { + call void @f(), !inline_history !{null} + ret void +} + +define void @function_metadata() { + call void @f(), !inline_history !{ptr @f} + ret void +} + +define void @mixed_metadata() { + call void @f(), !inline_history !{null, ptr @f, null, ptr @mixed_metadata} + ret void +} + +!0 = !{} diff --git a/llvm/unittests/Analysis/InlineOrderPlugin/InlineOrderPlugin.cpp b/llvm/unittests/Analysis/InlineOrderPlugin/InlineOrderPlugin.cpp index 6fe54c24d70c..a263edc3f6f4 100644 --- a/llvm/unittests/Analysis/InlineOrderPlugin/InlineOrderPlugin.cpp +++ b/llvm/unittests/Analysis/InlineOrderPlugin/InlineOrderPlugin.cpp @@ -12,31 +12,29 @@ using namespace llvm; namespace { -class NoFooInlineOrder : public InlineOrder> { +class NoFooInlineOrder : public InlineOrder { public: NoFooInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params, ModuleAnalysisManager &MAM, Module &M) { DefaultInlineOrder = getDefaultInlineOrder(FAM, Params, MAM, M); } size_t size() override { return DefaultInlineOrder->size(); } - void push(const std::pair &Elt) override { - // We ignore calles named "foo" - if (Elt.first->getCalledFunction()->getName() == "foo") { + void push(CallBase *Elt) override { + // We ignore callees named "foo" + if (Elt->getCalledFunction()->getName() == "foo") { DefaultInlineOrder->push(Elt); } } - std::pair pop() override { - return DefaultInlineOrder->pop(); - } - void erase_if(function_ref)> Pred) override { + CallBase *pop() override { return DefaultInlineOrder->pop(); } + void erase_if(function_ref Pred) override { DefaultInlineOrder->erase_if(Pred); } private: - std::unique_ptr>> DefaultInlineOrder; + std::unique_ptr DefaultInlineOrder; }; -std::unique_ptr>> +std::unique_ptr NoFooInlineOrderFactory(FunctionAnalysisManager &FAM, const InlineParams &Params, ModuleAnalysisManager &MAM, Module &M) {