
Currently the backing buffer of a `std::vector<char>` is passed[1] to `Demangler.getFunctionBaseName`. However, deeply inside the call stack `OutputBuffer::grow` will call[2] `std::realloc` if it needs to grow the buffer, leading to UB. The demangler APIs specify[3] that "`Buf` and `N` behave like the second and third parameters to `__cxa_demangle`" and the docs for the latter say[4] that the output buffer must be allocated with `malloc` (but can also be `NULL` and will then be realloced accordingly). Note: PR #135863 changed this from a stack array to a `std::vector` and increased the size to 65K, but this can still lead to a crash if the demangled name is longer than that - yes, I'm surprised that a >65K-long function name happens in practice... [1]:d7e631c7cd/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp (L744)
[2]:d7e631c7cd/llvm/include/llvm/Demangle/Utility.h (L50)
[3]:d7e631c7cd/llvm/include/llvm/Demangle/Demangle.h (L92-L93)
[4]: https://gcc.gnu.org/onlinedocs/libstdc++/libstdc++-html-USERS-4.3/a01696.html
928 lines
37 KiB
C++
928 lines
37 KiB
C++
//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the SampleProfileMatcher used for stale
|
|
// profile matching.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
|
|
#include "llvm/Demangle/Demangle.h"
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
#include "llvm/IR/MDBuilder.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Transforms/Utils/LongestCommonSequence.h"
|
|
|
|
using namespace llvm;
|
|
using namespace sampleprof;
|
|
|
|
#define DEBUG_TYPE "sample-profile-matcher"
|
|
|
|
static cl::opt<unsigned> FuncProfileSimilarityThreshold(
|
|
"func-profile-similarity-threshold", cl::Hidden, cl::init(80),
|
|
cl::desc("Consider a profile matches a function if the similarity of their "
|
|
"callee sequences is above the specified percentile."));
|
|
|
|
static cl::opt<unsigned> MinFuncCountForCGMatching(
|
|
"min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
|
|
cl::desc("The minimum number of basic blocks required for a function to "
|
|
"run stale profile call graph matching."));
|
|
|
|
static cl::opt<unsigned> MinCallCountForCGMatching(
|
|
"min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
|
|
cl::desc("The minimum number of call anchors required for a function to "
|
|
"run stale profile call graph matching."));
|
|
|
|
static cl::opt<bool> LoadFuncProfileforCGMatching(
|
|
"load-func-profile-for-cg-matching", cl::Hidden, cl::init(true),
|
|
cl::desc(
|
|
"Load top-level profiles that the sample reader initially skipped for "
|
|
"the call-graph matching (only meaningful for extended binary "
|
|
"format)"));
|
|
|
|
extern cl::opt<bool> SalvageStaleProfile;
|
|
extern cl::opt<bool> SalvageUnusedProfile;
|
|
extern cl::opt<bool> PersistProfileStaleness;
|
|
extern cl::opt<bool> ReportProfileStaleness;
|
|
|
|
static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
|
|
"salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
|
|
cl::desc("The maximum number of callsites in a function, above which stale "
|
|
"profile matching will be skipped."));
|
|
|
|
void SampleProfileMatcher::findIRAnchors(const Function &F,
|
|
AnchorMap &IRAnchors) const {
|
|
// For inlined code, recover the original callsite and callee by finding the
|
|
// top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
|
|
// top-level frame is "main:1", the callsite is "1" and the callee is "foo".
|
|
auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
|
|
assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
|
|
const DILocation *PrevDIL = nullptr;
|
|
do {
|
|
PrevDIL = DIL;
|
|
DIL = DIL->getInlinedAt();
|
|
} while (DIL->getInlinedAt());
|
|
|
|
LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
|
|
DIL, FunctionSamples::ProfileIsFS);
|
|
StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
|
|
return std::make_pair(Callsite, FunctionId(CalleeName));
|
|
};
|
|
|
|
auto GetCanonicalCalleeName = [](const CallBase *CB) {
|
|
StringRef CalleeName = UnknownIndirectCallee;
|
|
if (Function *Callee = CB->getCalledFunction())
|
|
CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
|
|
return CalleeName;
|
|
};
|
|
|
|
// Extract profile matching anchors in the IR.
|
|
for (auto &BB : F) {
|
|
for (auto &I : BB) {
|
|
DILocation *DIL = I.getDebugLoc();
|
|
if (!DIL)
|
|
continue;
|
|
|
|
if (FunctionSamples::ProfileIsProbeBased) {
|
|
if (auto Probe = extractProbe(I)) {
|
|
// Flatten inlined IR for the matching.
|
|
if (DIL->getInlinedAt()) {
|
|
IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
|
|
} else {
|
|
// Use empty StringRef for basic block probe.
|
|
StringRef CalleeName;
|
|
if (const auto *CB = dyn_cast<CallBase>(&I)) {
|
|
// Skip the probe inst whose callee name is "llvm.pseudoprobe".
|
|
if (!isa<IntrinsicInst>(&I))
|
|
CalleeName = GetCanonicalCalleeName(CB);
|
|
}
|
|
LineLocation Loc = LineLocation(Probe->Id, 0);
|
|
IRAnchors.emplace(Loc, FunctionId(CalleeName));
|
|
}
|
|
}
|
|
} else {
|
|
// TODO: For line-number based profile(AutoFDO), currently only support
|
|
// find callsite anchors. In future, we need to parse all the non-call
|
|
// instructions to extract the line locations for profile matching.
|
|
if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
|
|
continue;
|
|
|
|
if (DIL->getInlinedAt()) {
|
|
IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
|
|
} else {
|
|
LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
|
|
DIL, FunctionSamples::ProfileIsFS);
|
|
StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
|
|
IRAnchors.emplace(Callsite, FunctionId(CalleeName));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
|
|
AnchorMap &ProfileAnchors) const {
|
|
auto isInvalidLineOffset = [](uint32_t LineOffset) {
|
|
return LineOffset & 0x8000;
|
|
};
|
|
|
|
auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
|
|
AnchorMap &ProfileAnchors) {
|
|
auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
|
|
if (!Ret.second) {
|
|
// For multiple callees, which indicates it's an indirect call, we use a
|
|
// dummy name(UnknownIndirectCallee) as the indrect callee name.
|
|
Ret.first->second = FunctionId(UnknownIndirectCallee);
|
|
}
|
|
};
|
|
|
|
for (const auto &I : FS.getBodySamples()) {
|
|
const LineLocation &Loc = I.first;
|
|
if (isInvalidLineOffset(Loc.LineOffset))
|
|
continue;
|
|
for (const auto &C : I.second.getCallTargets())
|
|
InsertAnchor(Loc, C.first, ProfileAnchors);
|
|
}
|
|
|
|
for (const auto &I : FS.getCallsiteSamples()) {
|
|
const LineLocation &Loc = I.first;
|
|
if (isInvalidLineOffset(Loc.LineOffset))
|
|
continue;
|
|
for (const auto &C : I.second)
|
|
InsertAnchor(Loc, C.first, ProfileAnchors);
|
|
}
|
|
}
|
|
|
|
bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
|
|
Function *&FuncWithoutProfile) {
|
|
FuncWithoutProfile = nullptr;
|
|
auto R = FunctionsWithoutProfile.find(IRFuncName);
|
|
if (R != FunctionsWithoutProfile.end())
|
|
FuncWithoutProfile = R->second;
|
|
return !FuncWithoutProfile;
|
|
}
|
|
|
|
bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
|
|
return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
|
|
}
|
|
|
|
bool SampleProfileMatcher::functionMatchesProfile(
|
|
const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
|
|
bool FindMatchedProfileOnly) {
|
|
if (IRFuncName == ProfileFuncName)
|
|
return true;
|
|
if (!SalvageUnusedProfile)
|
|
return false;
|
|
|
|
// If IR function doesn't have profile and the profile is unused, try
|
|
// matching them.
|
|
Function *IRFunc = nullptr;
|
|
if (functionHasProfile(IRFuncName, IRFunc) ||
|
|
!isProfileUnused(ProfileFuncName))
|
|
return false;
|
|
|
|
assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
|
|
"IR function should be different from profile function to match");
|
|
return functionMatchesProfile(*IRFunc, ProfileFuncName,
|
|
FindMatchedProfileOnly);
|
|
}
|
|
|
|
LocToLocMap
|
|
SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
|
|
const AnchorList &AnchorList2,
|
|
bool MatchUnusedFunction) {
|
|
LocToLocMap MatchedAnchors;
|
|
llvm::longestCommonSequence<LineLocation, FunctionId>(
|
|
AnchorList1, AnchorList2,
|
|
[&](const FunctionId &A, const FunctionId &B) {
|
|
return functionMatchesProfile(
|
|
A, B,
|
|
!MatchUnusedFunction // Find matched function only
|
|
);
|
|
},
|
|
[&](LineLocation A, LineLocation B) {
|
|
MatchedAnchors.try_emplace(A, B);
|
|
});
|
|
return MatchedAnchors;
|
|
}
|
|
|
|
void SampleProfileMatcher::matchNonCallsiteLocs(
|
|
const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
|
|
LocToLocMap &IRToProfileLocationMap) {
|
|
auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
|
|
// Skip the unchanged location mapping to save memory.
|
|
if (From != To)
|
|
IRToProfileLocationMap.insert({From, To});
|
|
};
|
|
|
|
// Use function's beginning location as the initial anchor.
|
|
int32_t LocationDelta = 0;
|
|
SmallVector<LineLocation> LastMatchedNonAnchors;
|
|
for (const auto &IR : IRAnchors) {
|
|
const auto &Loc = IR.first;
|
|
bool IsMatchedAnchor = false;
|
|
// Match the anchor location in lexical order.
|
|
auto R = MatchedAnchors.find(Loc);
|
|
if (R != MatchedAnchors.end()) {
|
|
const auto &Candidate = R->second;
|
|
InsertMatching(Loc, Candidate);
|
|
LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
|
|
<< " is matched from " << Loc << " to " << Candidate
|
|
<< "\n");
|
|
LocationDelta = Candidate.LineOffset - Loc.LineOffset;
|
|
|
|
// Match backwards for non-anchor locations.
|
|
// The locations in LastMatchedNonAnchors have been matched forwards
|
|
// based on the previous anchor, spilt it evenly and overwrite the
|
|
// second half based on the current anchor.
|
|
for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
|
|
I < LastMatchedNonAnchors.size(); I++) {
|
|
const auto &L = LastMatchedNonAnchors[I];
|
|
uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
|
|
LineLocation Candidate(CandidateLineOffset, L.Discriminator);
|
|
InsertMatching(L, Candidate);
|
|
LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
|
|
<< " to " << Candidate << "\n");
|
|
}
|
|
|
|
IsMatchedAnchor = true;
|
|
LastMatchedNonAnchors.clear();
|
|
}
|
|
|
|
// Match forwards for non-anchor locations.
|
|
if (!IsMatchedAnchor) {
|
|
uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
|
|
LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
|
|
InsertMatching(Loc, Candidate);
|
|
LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
|
|
<< Candidate << "\n");
|
|
LastMatchedNonAnchors.emplace_back(Loc);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Filter the non-call locations from IRAnchors and ProfileAnchors and write
|
|
// them into a list for random access later.
|
|
void SampleProfileMatcher::getFilteredAnchorList(
|
|
const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
|
|
AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
|
|
for (const auto &I : IRAnchors) {
|
|
if (I.second.stringRef().empty())
|
|
continue;
|
|
FilteredIRAnchorsList.emplace_back(I);
|
|
}
|
|
|
|
for (const auto &I : ProfileAnchors)
|
|
FilteredProfileAnchorList.emplace_back(I);
|
|
}
|
|
|
|
// Call target name anchor based profile fuzzy matching.
|
|
// Input:
|
|
// For IR locations, the anchor is the callee name of direct callsite; For
|
|
// profile locations, it's the call target name for BodySamples or inlinee's
|
|
// profile name for CallsiteSamples.
|
|
// Matching heuristic:
|
|
// First match all the anchors using the diff algorithm, then split the
|
|
// non-anchor locations between the two anchors evenly, first half are matched
|
|
// based on the start anchor, second half are matched based on the end anchor.
|
|
// For example, given:
|
|
// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
|
|
// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
|
|
// The matching gives:
|
|
// [1, 2(foo), 3, 5, 6(bar), 7]
|
|
// | | | | | |
|
|
// [1, 2, 3(foo), 4, 7, 8(bar), 9]
|
|
// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
|
|
void SampleProfileMatcher::runStaleProfileMatching(
|
|
const Function &F, const AnchorMap &IRAnchors,
|
|
const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
|
|
bool RunCFGMatching, bool RunCGMatching) {
|
|
if (!RunCFGMatching && !RunCGMatching)
|
|
return;
|
|
LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
|
|
<< "\n");
|
|
assert(IRToProfileLocationMap.empty() &&
|
|
"Run stale profile matching only once per function");
|
|
|
|
AnchorList FilteredProfileAnchorList;
|
|
AnchorList FilteredIRAnchorsList;
|
|
getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
|
|
FilteredProfileAnchorList);
|
|
|
|
if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
|
|
return;
|
|
|
|
if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
|
|
FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
|
|
LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
|
|
<< " because the number of callsites in the IR is "
|
|
<< FilteredIRAnchorsList.size()
|
|
<< " and in the profile is "
|
|
<< FilteredProfileAnchorList.size() << "\n");
|
|
return;
|
|
}
|
|
|
|
// Match the callsite anchors by finding the longest common subsequence
|
|
// between IR and profile.
|
|
// Define a match between two anchors as follows:
|
|
// 1) The function names of anchors are the same.
|
|
// 2) The similarity between the anchor functions is above a threshold if
|
|
// RunCGMatching is set.
|
|
// For 2), we only consider the anchor functions from IR and profile don't
|
|
// appear on either side to reduce the matching scope. Note that we need to
|
|
// use IR anchor as base(A side) to align with the order of
|
|
// IRToProfileLocationMap.
|
|
LocToLocMap MatchedAnchors =
|
|
longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
|
|
RunCGMatching /* Match unused functions */);
|
|
|
|
// CFG level matching:
|
|
// Apply the callsite matchings to infer matching for the basic
|
|
// block(non-callsite) locations and write the result to
|
|
// IRToProfileLocationMap.
|
|
if (RunCFGMatching)
|
|
matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
|
|
}
|
|
|
|
void SampleProfileMatcher::runOnFunction(Function &F) {
|
|
// We need to use flattened function samples for matching.
|
|
// Unlike IR, which includes all callsites from the source code, the callsites
|
|
// in profile only show up when they are hit by samples, i,e. the profile
|
|
// callsites in one context may differ from those in another context. To get
|
|
// the maximum number of callsites, we merge the function profiles from all
|
|
// contexts, aka, the flattened profile to find profile anchors.
|
|
const auto *FSForMatching = getFlattenedSamplesFor(F);
|
|
if (SalvageUnusedProfile && !FSForMatching) {
|
|
// Apply the matching in place to find the new function's matched profile.
|
|
auto R = FuncToProfileNameMap.find(&F);
|
|
if (R != FuncToProfileNameMap.end()) {
|
|
FSForMatching = getFlattenedSamplesFor(R->second);
|
|
// Try to find the salvaged top-level profiles that are explicitly loaded
|
|
// for the matching, see "functionMatchesProfileHelper" for the details.
|
|
if (!FSForMatching && LoadFuncProfileforCGMatching)
|
|
FSForMatching = Reader.getSamplesFor(R->second.stringRef());
|
|
}
|
|
}
|
|
if (!FSForMatching)
|
|
return;
|
|
|
|
// Anchors for IR. It's a map from IR location to callee name, callee name is
|
|
// empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
|
|
// for unknown indrect callee name.
|
|
AnchorMap IRAnchors;
|
|
findIRAnchors(F, IRAnchors);
|
|
// Anchors for profile. It's a map from callsite location to a set of callee
|
|
// name.
|
|
AnchorMap ProfileAnchors;
|
|
findProfileAnchors(*FSForMatching, ProfileAnchors);
|
|
|
|
// Compute the callsite match states for profile staleness report.
|
|
if (ReportProfileStaleness || PersistProfileStaleness)
|
|
recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
|
|
|
|
if (!SalvageStaleProfile)
|
|
return;
|
|
// For probe-based profiles, run matching only when profile checksum is
|
|
// mismatched.
|
|
bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
|
|
!ProbeManager->profileIsValid(F, *FSForMatching);
|
|
bool RunCFGMatching =
|
|
!FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
|
|
bool RunCGMatching = SalvageUnusedProfile;
|
|
// For imported functions, the checksum metadata(pseudo_probe_desc) are
|
|
// dropped, so we leverage function attribute(profile-checksum-mismatch) to
|
|
// transfer the info: add the attribute during pre-link phase and check it
|
|
// during post-link phase(see "profileIsValid").
|
|
if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
|
|
F.addFnAttr("profile-checksum-mismatch");
|
|
|
|
// The matching result will be saved to IRToProfileLocationMap, create a
|
|
// new map for each function.
|
|
auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
|
|
runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
|
|
RunCFGMatching, RunCGMatching);
|
|
// Find and update callsite match states after matching.
|
|
if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
|
|
recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
|
|
&IRToProfileLocationMap);
|
|
}
|
|
|
|
void SampleProfileMatcher::recordCallsiteMatchStates(
|
|
const Function &F, const AnchorMap &IRAnchors,
|
|
const AnchorMap &ProfileAnchors,
|
|
const LocToLocMap *IRToProfileLocationMap) {
|
|
bool IsPostMatch = IRToProfileLocationMap != nullptr;
|
|
auto &CallsiteMatchStates =
|
|
FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
|
|
|
|
auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
|
|
// IRToProfileLocationMap is null in pre-match phrase.
|
|
if (!IRToProfileLocationMap)
|
|
return IRLoc;
|
|
const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
|
|
if (ProfileLoc != IRToProfileLocationMap->end())
|
|
return ProfileLoc->second;
|
|
else
|
|
return IRLoc;
|
|
};
|
|
|
|
for (const auto &I : IRAnchors) {
|
|
// After fuzzy profile matching, use the matching result to remap the
|
|
// current IR callsite.
|
|
const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
|
|
const auto &IRCalleeId = I.second;
|
|
const auto &It = ProfileAnchors.find(ProfileLoc);
|
|
if (It == ProfileAnchors.end())
|
|
continue;
|
|
const auto &ProfCalleeId = It->second;
|
|
if (IRCalleeId == ProfCalleeId) {
|
|
auto It = CallsiteMatchStates.find(ProfileLoc);
|
|
if (It == CallsiteMatchStates.end())
|
|
CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
|
|
else if (IsPostMatch) {
|
|
if (It->second == MatchState::InitialMatch)
|
|
It->second = MatchState::UnchangedMatch;
|
|
else if (It->second == MatchState::InitialMismatch)
|
|
It->second = MatchState::RecoveredMismatch;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if there are any callsites in the profile that does not match to any
|
|
// IR callsites.
|
|
for (const auto &I : ProfileAnchors) {
|
|
const auto &Loc = I.first;
|
|
assert(!I.second.stringRef().empty() && "Callees should not be empty");
|
|
auto It = CallsiteMatchStates.find(Loc);
|
|
if (It == CallsiteMatchStates.end())
|
|
CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
|
|
else if (IsPostMatch) {
|
|
// Update the state if it's not matched(UnchangedMatch or
|
|
// RecoveredMismatch).
|
|
if (It->second == MatchState::InitialMismatch)
|
|
It->second = MatchState::UnchangedMismatch;
|
|
else if (It->second == MatchState::InitialMatch)
|
|
It->second = MatchState::RemovedMatch;
|
|
}
|
|
}
|
|
}
|
|
|
|
void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
|
|
bool IsTopLevel) {
|
|
const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
|
|
// Skip the function that is external or renamed.
|
|
if (!FuncDesc)
|
|
return;
|
|
|
|
if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
|
|
if (IsTopLevel)
|
|
NumStaleProfileFunc++;
|
|
// Given currently all probe ids are after block probe ids, once the
|
|
// checksum is mismatched, it's likely all the callites are mismatched and
|
|
// dropped. We conservatively count all the samples as mismatched and stop
|
|
// counting the inlinees' profiles.
|
|
MismatchedFunctionSamples += FS.getTotalSamples();
|
|
return;
|
|
}
|
|
|
|
// Even the current-level function checksum is matched, it's possible that the
|
|
// nested inlinees' checksums are mismatched that affect the inlinee's sample
|
|
// loading, we need to go deeper to check the inlinees' function samples.
|
|
// Similarly, count all the samples as mismatched if the inlinee's checksum is
|
|
// mismatched using this recursive function.
|
|
for (const auto &I : FS.getCallsiteSamples())
|
|
for (const auto &CS : I.second)
|
|
countMismatchedFuncSamples(CS.second, false);
|
|
}
|
|
|
|
void SampleProfileMatcher::countMismatchedCallsiteSamples(
|
|
const FunctionSamples &FS) {
|
|
auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
|
|
// Skip it if no mismatched callsite or this is an external function.
|
|
if (It == FuncCallsiteMatchStates.end() || It->second.empty())
|
|
return;
|
|
const auto &CallsiteMatchStates = It->second;
|
|
|
|
auto findMatchState = [&](const LineLocation &Loc) {
|
|
auto It = CallsiteMatchStates.find(Loc);
|
|
if (It == CallsiteMatchStates.end())
|
|
return MatchState::Unknown;
|
|
return It->second;
|
|
};
|
|
|
|
auto AttributeMismatchedSamples = [&](const enum MatchState &State,
|
|
uint64_t Samples) {
|
|
if (isMismatchState(State))
|
|
MismatchedCallsiteSamples += Samples;
|
|
else if (State == MatchState::RecoveredMismatch)
|
|
RecoveredCallsiteSamples += Samples;
|
|
};
|
|
|
|
// The non-inlined callsites are saved in the body samples of function
|
|
// profile, go through it to count the non-inlined callsite samples.
|
|
for (const auto &I : FS.getBodySamples())
|
|
AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
|
|
|
|
// Count the inlined callsite samples.
|
|
for (const auto &I : FS.getCallsiteSamples()) {
|
|
auto State = findMatchState(I.first);
|
|
uint64_t CallsiteSamples = 0;
|
|
for (const auto &CS : I.second)
|
|
CallsiteSamples += CS.second.getTotalSamples();
|
|
AttributeMismatchedSamples(State, CallsiteSamples);
|
|
|
|
if (isMismatchState(State))
|
|
continue;
|
|
|
|
// When the current level of inlined call site matches the profiled call
|
|
// site, we need to go deeper along the inline tree to count mismatches from
|
|
// lower level inlinees.
|
|
for (const auto &CS : I.second)
|
|
countMismatchedCallsiteSamples(CS.second);
|
|
}
|
|
}
|
|
|
|
void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
|
|
auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
|
|
// Skip it if no mismatched callsite or this is an external function.
|
|
if (It == FuncCallsiteMatchStates.end() || It->second.empty())
|
|
return;
|
|
const auto &MatchStates = It->second;
|
|
[[maybe_unused]] bool OnInitialState =
|
|
isInitialState(MatchStates.begin()->second);
|
|
for (const auto &I : MatchStates) {
|
|
TotalProfiledCallsites++;
|
|
assert(
|
|
(OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
|
|
"Profile matching state is inconsistent");
|
|
|
|
if (isMismatchState(I.second))
|
|
NumMismatchedCallsites++;
|
|
else if (I.second == MatchState::RecoveredMismatch)
|
|
NumRecoveredCallsites++;
|
|
}
|
|
}
|
|
|
|
void SampleProfileMatcher::countCallGraphRecoveredSamples(
|
|
const FunctionSamples &FS,
|
|
std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
|
|
if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
|
|
NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
|
|
return;
|
|
}
|
|
|
|
for (const auto &CM : FS.getCallsiteSamples()) {
|
|
for (const auto &CS : CM.second) {
|
|
countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
|
|
}
|
|
}
|
|
}
|
|
|
|
void SampleProfileMatcher::computeAndReportProfileStaleness() {
|
|
if (!ReportProfileStaleness && !PersistProfileStaleness)
|
|
return;
|
|
|
|
std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
|
|
if (SalvageUnusedProfile) {
|
|
for (const auto &I : FuncToProfileNameMap) {
|
|
CallGraphRecoveredProfiles.insert(I.second);
|
|
if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
|
|
continue;
|
|
NumCallGraphRecoveredProfiledFunc++;
|
|
}
|
|
}
|
|
|
|
// Count profile mismatches for profile staleness report.
|
|
for (const auto &F : M) {
|
|
if (skipProfileForFunction(F))
|
|
continue;
|
|
// As the stats will be merged by linker, skip reporting the metrics for
|
|
// imported functions to avoid repeated counting.
|
|
if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
|
|
continue;
|
|
const auto *FS = Reader.getSamplesFor(F);
|
|
if (!FS)
|
|
continue;
|
|
TotalProfiledFunc++;
|
|
TotalFunctionSamples += FS->getTotalSamples();
|
|
|
|
if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
|
|
countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
|
|
|
|
// Checksum mismatch is only used in pseudo-probe mode.
|
|
if (FunctionSamples::ProfileIsProbeBased)
|
|
countMismatchedFuncSamples(*FS, true);
|
|
|
|
// Count mismatches and samples for calliste.
|
|
countMismatchCallsites(*FS);
|
|
countMismatchedCallsiteSamples(*FS);
|
|
}
|
|
|
|
if (ReportProfileStaleness) {
|
|
if (FunctionSamples::ProfileIsProbeBased) {
|
|
errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
|
|
<< ") of functions' profile are invalid and ("
|
|
<< MismatchedFunctionSamples << "/" << TotalFunctionSamples
|
|
<< ") of samples are discarded due to function hash mismatch.\n";
|
|
}
|
|
if (SalvageUnusedProfile) {
|
|
errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
|
|
<< TotalProfiledFunc << ") of functions' profile are matched and ("
|
|
<< NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
|
|
<< ") of samples are reused by call graph matching.\n";
|
|
}
|
|
|
|
errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
|
|
<< TotalProfiledCallsites
|
|
<< ") of callsites' profile are invalid and ("
|
|
<< (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
|
|
<< TotalFunctionSamples
|
|
<< ") of samples are discarded due to callsite location mismatch.\n";
|
|
errs() << "(" << NumRecoveredCallsites << "/"
|
|
<< (NumRecoveredCallsites + NumMismatchedCallsites)
|
|
<< ") of callsites and (" << RecoveredCallsiteSamples << "/"
|
|
<< (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
|
|
<< ") of samples are recovered by stale profile matching.\n";
|
|
}
|
|
|
|
if (PersistProfileStaleness) {
|
|
LLVMContext &Ctx = M.getContext();
|
|
MDBuilder MDB(Ctx);
|
|
|
|
SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
|
|
if (FunctionSamples::ProfileIsProbeBased) {
|
|
ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
|
|
ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
|
|
ProfStatsVec.emplace_back("MismatchedFunctionSamples",
|
|
MismatchedFunctionSamples);
|
|
ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
|
|
}
|
|
|
|
if (SalvageUnusedProfile) {
|
|
ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
|
|
NumCallGraphRecoveredProfiledFunc);
|
|
ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
|
|
NumCallGraphRecoveredFuncSamples);
|
|
}
|
|
|
|
ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
|
|
ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
|
|
ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
|
|
ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
|
|
MismatchedCallsiteSamples);
|
|
ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
|
|
RecoveredCallsiteSamples);
|
|
|
|
auto *MD = MDB.createLLVMStats(ProfStatsVec);
|
|
auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
|
|
NMD->addOperand(MD);
|
|
}
|
|
}
|
|
|
|
void SampleProfileMatcher::findFunctionsWithoutProfile() {
|
|
// TODO: Support MD5 profile.
|
|
if (FunctionSamples::UseMD5)
|
|
return;
|
|
StringSet<> NamesInProfile;
|
|
if (auto NameTable = Reader.getNameTable()) {
|
|
for (auto Name : *NameTable)
|
|
NamesInProfile.insert(Name.stringRef());
|
|
}
|
|
|
|
for (auto &F : M) {
|
|
// Skip declarations, as even if the function can be matched, we have
|
|
// nothing to do with it.
|
|
if (F.isDeclaration())
|
|
continue;
|
|
|
|
StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
|
|
const auto *FS = getFlattenedSamplesFor(F);
|
|
if (FS)
|
|
continue;
|
|
|
|
// For extended binary, functions fully inlined may not be loaded in the
|
|
// top-level profile, so check the NameTable which has the all symbol names
|
|
// in profile.
|
|
if (NamesInProfile.count(CanonFName))
|
|
continue;
|
|
|
|
// For extended binary, non-profiled function symbols are in the profile
|
|
// symbol list table.
|
|
if (PSL && PSL->contains(CanonFName))
|
|
continue;
|
|
|
|
LLVM_DEBUG(dbgs() << "Function " << CanonFName
|
|
<< " is not in profile or profile symbol list.\n");
|
|
FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
|
|
}
|
|
}
|
|
|
|
bool SampleProfileMatcher::functionMatchesProfileHelper(
|
|
const Function &IRFunc, const FunctionId &ProfFunc) {
|
|
// The value is in the range [0, 1]. The bigger the value is, the more similar
|
|
// two sequences are.
|
|
float Similarity = 0.0;
|
|
|
|
// Match the functions if they have the same base name(after demangling) and
|
|
// skip the similarity check.
|
|
ItaniumPartialDemangler Demangler;
|
|
// Helper lambda to demangle and get the base name. If the demangling failed,
|
|
// return an empty string.
|
|
auto GetBaseName = [&](StringRef FName) {
|
|
auto FunctionName = FName.str();
|
|
if (Demangler.partialDemangle(FunctionName.c_str()))
|
|
return std::string();
|
|
size_t BaseNameSize = 0;
|
|
// The demangler API follows the __cxa_demangle one, and thus needs a
|
|
// pointer that originates from malloc (or nullptr) and the caller is
|
|
// responsible for free()-ing the buffer.
|
|
char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize);
|
|
std::string Result = (BaseNamePtr && BaseNameSize)
|
|
? std::string(BaseNamePtr, BaseNameSize)
|
|
: std::string();
|
|
free(BaseNamePtr);
|
|
return Result;
|
|
};
|
|
auto IRBaseName = GetBaseName(IRFunc.getName());
|
|
auto ProfBaseName = GetBaseName(ProfFunc.stringRef());
|
|
if (!IRBaseName.empty() && IRBaseName == ProfBaseName) {
|
|
LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and "
|
|
<< ProfFunc << "(Profile) share the same base name: "
|
|
<< IRBaseName << ".\n");
|
|
return true;
|
|
}
|
|
|
|
const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
|
|
// With extbinary profile format, initial profile loading only reads profile
|
|
// based on current function names in the module.
|
|
// However, if a function is renamed, sample loader skips to load its original
|
|
// profile(which has a different name), we will miss this case. To address
|
|
// this, we load the top-level profile candidate explicitly for the matching.
|
|
if (!FSForMatching && LoadFuncProfileforCGMatching) {
|
|
DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
|
|
if (std::error_code EC = Reader.read(TopLevelFunc))
|
|
return false;
|
|
FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
|
|
LLVM_DEBUG({
|
|
if (FSForMatching)
|
|
dbgs() << "Read top-level function " << ProfFunc
|
|
<< " for call-graph matching\n";
|
|
});
|
|
}
|
|
if (!FSForMatching)
|
|
return false;
|
|
// The check for similarity or checksum may not be reliable if the function is
|
|
// tiny, we use the number of basic block as a proxy for the function
|
|
// complexity and skip the matching if it's too small.
|
|
if (IRFunc.size() < MinFuncCountForCGMatching ||
|
|
FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
|
|
return false;
|
|
|
|
// For probe-based function, we first trust the checksum info. If the checksum
|
|
// doesn't match, we continue checking for similarity.
|
|
if (FunctionSamples::ProfileIsProbeBased) {
|
|
const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
|
|
if (FuncDesc &&
|
|
!ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
|
|
LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
|
|
<< "(IR) and " << ProfFunc << "(Profile) match.\n");
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
AnchorMap IRAnchors;
|
|
findIRAnchors(IRFunc, IRAnchors);
|
|
AnchorMap ProfileAnchors;
|
|
findProfileAnchors(*FSForMatching, ProfileAnchors);
|
|
|
|
AnchorList FilteredIRAnchorsList;
|
|
AnchorList FilteredProfileAnchorList;
|
|
getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
|
|
FilteredProfileAnchorList);
|
|
|
|
// Similarly skip the matching if the num of anchors is not enough.
|
|
if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
|
|
FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
|
|
return false;
|
|
|
|
// Use the diff algorithm to find the LCS between IR and profile.
|
|
|
|
// Don't recursively match the callee function to avoid infinite matching,
|
|
// callee functions will be handled later since it's processed in top-down
|
|
// order .
|
|
LocToLocMap MatchedAnchors =
|
|
longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
|
|
false /* Match unused functions */);
|
|
|
|
Similarity = static_cast<float>(MatchedAnchors.size()) /
|
|
FilteredProfileAnchorList.size();
|
|
|
|
LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
|
|
<< "(IR) and " << ProfFunc << "(profile) is "
|
|
<< format("%.2f", Similarity) << "\n");
|
|
assert((Similarity >= 0 && Similarity <= 1.0) &&
|
|
"Similarity value should be in [0, 1]");
|
|
return Similarity * 100 > FuncProfileSimilarityThreshold;
|
|
}
|
|
|
|
// If FindMatchedProfileOnly is set to true, only use the processed function
|
|
// results. This is used for skipping the repeated recursive matching.
|
|
bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
|
|
const FunctionId &ProfFunc,
|
|
bool FindMatchedProfileOnly) {
|
|
auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
|
|
if (R != FuncProfileMatchCache.end())
|
|
return R->second;
|
|
|
|
if (FindMatchedProfileOnly)
|
|
return false;
|
|
|
|
bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
|
|
FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
|
|
if (Matched) {
|
|
FuncToProfileNameMap[&IRFunc] = ProfFunc;
|
|
LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
|
|
<< " matches profile:" << ProfFunc << "\n");
|
|
}
|
|
|
|
return Matched;
|
|
}
|
|
|
|
void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
|
|
DenseSet<StringRef> ProfileSalvagedFuncs;
|
|
// Update FuncNameToProfNameMap and SymbolMap.
|
|
for (auto &I : FuncToProfileNameMap) {
|
|
assert(I.first && "New function is null");
|
|
FunctionId FuncName(I.first->getName());
|
|
ProfileSalvagedFuncs.insert(I.second.stringRef());
|
|
FuncNameToProfNameMap->emplace(FuncName, I.second);
|
|
|
|
// We need to remove the old entry to avoid duplicating the function
|
|
// processing.
|
|
SymbolMap->erase(FuncName);
|
|
SymbolMap->emplace(I.second, I.first);
|
|
}
|
|
|
|
// With extbinary profile format, initial profile loading only reads profile
|
|
// based on current function names in the module, so we need to load top-level
|
|
// profiles for functions with different profile name explicitly after
|
|
// function-profile name map is established with stale profile matching.
|
|
Reader.read(ProfileSalvagedFuncs);
|
|
Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
|
|
}
|
|
|
|
void SampleProfileMatcher::runOnModule() {
|
|
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
|
|
FunctionSamples::ProfileIsCS);
|
|
if (SalvageUnusedProfile)
|
|
findFunctionsWithoutProfile();
|
|
|
|
// Process the matching in top-down order so that the caller matching result
|
|
// can be used to the callee matching.
|
|
std::vector<Function *> TopDownFunctionList;
|
|
TopDownFunctionList.reserve(M.size());
|
|
buildTopDownFuncOrder(CG, TopDownFunctionList);
|
|
for (auto *F : TopDownFunctionList) {
|
|
if (skipProfileForFunction(*F))
|
|
continue;
|
|
runOnFunction(*F);
|
|
}
|
|
|
|
if (SalvageUnusedProfile)
|
|
UpdateWithSalvagedProfiles();
|
|
|
|
if (SalvageStaleProfile)
|
|
distributeIRToProfileLocationMap();
|
|
|
|
computeAndReportProfileStaleness();
|
|
}
|
|
|
|
void SampleProfileMatcher::distributeIRToProfileLocationMap(
|
|
FunctionSamples &FS) {
|
|
const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
|
|
if (ProfileMappings != FuncMappings.end()) {
|
|
FS.setIRToProfileLocationMap(&(ProfileMappings->second));
|
|
}
|
|
|
|
for (auto &Callees :
|
|
const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
|
|
for (auto &FS : Callees.second) {
|
|
distributeIRToProfileLocationMap(FS.second);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Use a central place to distribute the matching results. Outlined and inlined
|
|
// profile with the function name will be set to the same pointer.
|
|
void SampleProfileMatcher::distributeIRToProfileLocationMap() {
|
|
for (auto &I : Reader.getProfiles()) {
|
|
distributeIRToProfileLocationMap(I.second);
|
|
}
|
|
}
|