[BOLT] Support profile density with basic samples (#137644)
For profile with LBR samples, binary function profile density is computed as a ratio of executed bytes to function size in bytes. For profile with IP samples, use the size of basic block containing the sample IP as a numerator. Test Plan: updated perf_test.test
This commit is contained in:
parent
066bc49f76
commit
8f31c6dde7
@ -246,6 +246,9 @@ struct FuncSampleData {
|
||||
/// Get the number of samples recorded in [Start, End)
|
||||
uint64_t getSamples(uint64_t Start, uint64_t End) const;
|
||||
|
||||
/// Returns the total number of samples recorded in this function.
|
||||
uint64_t getSamples() const;
|
||||
|
||||
/// Aggregation helper
|
||||
DenseMap<uint64_t, size_t> Index;
|
||||
|
||||
|
@ -565,15 +565,14 @@ void DataAggregator::processProfile(BinaryContext &BC) {
|
||||
processMemEvents();
|
||||
|
||||
// Mark all functions with registered events as having a valid profile.
|
||||
const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
|
||||
: BinaryFunction::PF_LBR;
|
||||
for (auto &BFI : BC.getBinaryFunctions()) {
|
||||
BinaryFunction &BF = BFI.second;
|
||||
FuncBranchData *FBD = getBranchData(BF);
|
||||
if (FBD || getFuncSampleData(BF.getNames())) {
|
||||
BF.markProfiled(Flags);
|
||||
if (FBD)
|
||||
BF.RawBranchCount = FBD->getNumExecutedBranches();
|
||||
if (FuncBranchData *FBD = getBranchData(BF)) {
|
||||
BF.markProfiled(BinaryFunction::PF_LBR);
|
||||
BF.RawBranchCount = FBD->getNumExecutedBranches();
|
||||
} else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) {
|
||||
BF.markProfiled(BinaryFunction::PF_SAMPLE);
|
||||
BF.RawBranchCount = FSD->getSamples();
|
||||
}
|
||||
}
|
||||
|
||||
@ -630,10 +629,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
|
||||
|
||||
bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
|
||||
uint64_t Count) {
|
||||
// To record executed bytes, use basic block size as is regardless of BAT.
|
||||
uint64_t BlockSize = 0;
|
||||
if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
|
||||
Address - OrigFunc.getAddress()))
|
||||
BlockSize = BB->getOriginalSize();
|
||||
|
||||
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
|
||||
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
|
||||
if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
|
||||
if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
|
||||
NumColdSamples += Count;
|
||||
// Attach executed bytes to parent function in case of cold fragment.
|
||||
Func.SampleCountInBytes += Count * BlockSize;
|
||||
|
||||
auto I = NamesToSamples.find(Func.getOneName());
|
||||
if (I == NamesToSamples.end()) {
|
||||
|
@ -128,6 +128,13 @@ uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
|
||||
return Result;
|
||||
}
|
||||
|
||||
uint64_t FuncSampleData::getSamples() const {
|
||||
uint64_t Result = 0;
|
||||
for (const SampleInfo &I : Data)
|
||||
Result += I.Hits;
|
||||
return Result;
|
||||
}
|
||||
|
||||
void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
|
||||
auto Iter = Index.find(Offset);
|
||||
if (Iter == Index.end()) {
|
||||
|
@ -8,6 +8,7 @@ RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
|
||||
|
||||
CHECK-NOT: PERF2BOLT-ERROR
|
||||
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
|
||||
CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
|
||||
|
||||
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
|
||||
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
|
||||
|
Loading…
x
Reference in New Issue
Block a user