
This change enables llvm-profgen to use accurate context-sensitive post-optimization function byte size as a cost proxy to drive global preinline decisions. To do this, BinarySizeContextTracker is introduced to track function byte size under different inline context during disassembling. In preinliner, we can not query context byte size under switch `context-cost-for-preinliner`. The tracker uses a reverse trie to keep size of functions under different context (callee as parent, caller as child), and it can give best/longest possible matching context size for given input context. The new size cost is off by default. There're a few TODOs that needs to addressed: 1) avoid dangling string from `Offset2LocStackMap`, which will be addressed in split context work; 2) using inlinee's entry probe to make sure we have correct zero size for inlinee that's completely optimized away after inlining. Some tuning is also needed. Differential Revision: https://reviews.llvm.org/D108180
96 lines
3.6 KiB
C++
96 lines
3.6 KiB
C++
//===-- CSPreInliner.h - Profile guided preinliner ---------------- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H
|
|
#define LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H
|
|
|
|
#include "ProfiledBinary.h"
|
|
#include "llvm/ADT/PriorityQueue.h"
|
|
#include "llvm/ProfileData/ProfileCommon.h"
|
|
#include "llvm/ProfileData/SampleProf.h"
|
|
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
|
|
#include "llvm/Transforms/IPO/SampleContextTracker.h"
|
|
|
|
using namespace llvm;
|
|
using namespace sampleprof;
|
|
|
|
namespace llvm {
|
|
namespace sampleprof {
|
|
|
|
// Inline candidate seen from profile
|
|
struct ProfiledInlineCandidate {
|
|
ProfiledInlineCandidate(const FunctionSamples *Samples, uint64_t Count,
|
|
uint32_t Size)
|
|
: CalleeSamples(Samples), CallsiteCount(Count), SizeCost(Size) {}
|
|
// Context-sensitive function profile for inline candidate
|
|
const FunctionSamples *CalleeSamples;
|
|
// Call site count for an inline candidate
|
|
// TODO: make sure entry count for context profile and call site
|
|
// target count for corresponding call are consistent.
|
|
uint64_t CallsiteCount;
|
|
// Size proxy for function under particular call context.
|
|
uint64_t SizeCost;
|
|
};
|
|
|
|
// Inline candidate comparer using call site weight
|
|
struct ProfiledCandidateComparer {
|
|
bool operator()(const ProfiledInlineCandidate &LHS,
|
|
const ProfiledInlineCandidate &RHS) {
|
|
if (LHS.CallsiteCount != RHS.CallsiteCount)
|
|
return LHS.CallsiteCount < RHS.CallsiteCount;
|
|
|
|
if (LHS.SizeCost != RHS.SizeCost)
|
|
return LHS.SizeCost > RHS.SizeCost;
|
|
|
|
// Tie breaker using GUID so we have stable/deterministic inlining order
|
|
assert(LHS.CalleeSamples && RHS.CalleeSamples &&
|
|
"Expect non-null FunctionSamples");
|
|
return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
|
|
RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
|
|
}
|
|
};
|
|
|
|
using ProfiledCandidateQueue =
|
|
PriorityQueue<ProfiledInlineCandidate, std::vector<ProfiledInlineCandidate>,
|
|
ProfiledCandidateComparer>;
|
|
|
|
// Pre-compilation inliner based on context-sensitive profile.
|
|
// The PreInliner estimates inline decision using hotness from profile
|
|
// and cost estimation from machine code size. It helps merges context
|
|
// profile globally and achieves better post-inine profile quality, which
|
|
// otherwise won't be possible for ThinLTO. It also reduce context profile
|
|
// size by only keep context that is estimated to be inlined.
|
|
class CSPreInliner {
|
|
public:
|
|
CSPreInliner(StringMap<FunctionSamples> &Profiles, ProfiledBinary &Binary,
|
|
uint64_t HotThreshold, uint64_t ColdThreshold);
|
|
void run();
|
|
|
|
private:
|
|
bool getInlineCandidates(ProfiledCandidateQueue &CQueue,
|
|
const FunctionSamples *FCallerContextSamples);
|
|
std::vector<StringRef> buildTopDownOrder();
|
|
void processFunction(StringRef Name);
|
|
bool shouldInline(ProfiledInlineCandidate &Candidate);
|
|
uint32_t getFuncSize(const FunctionSamples &FSamples);
|
|
bool UseContextCost;
|
|
SampleContextTracker ContextTracker;
|
|
StringMap<FunctionSamples> &ProfileMap;
|
|
ProfiledBinary &Binary;
|
|
|
|
// Count thresholds to answer isHotCount and isColdCount queries.
|
|
// Mirrors the threshold in ProfileSummaryInfo.
|
|
uint64_t HotCountThreshold;
|
|
uint64_t ColdCountThreshold;
|
|
};
|
|
|
|
} // end namespace sampleprof
|
|
} // end namespace llvm
|
|
|
|
#endif
|