[BOLT] Support profile density with basic samples (#137644)

For profile with LBR samples, binary function profile density is
computed as a ratio of executed bytes to function size in bytes.

For profile with IP samples, use the size of basic block containing the
sample IP as a numerator.

Test Plan: updated perf_test.test
This commit is contained in:
Amir Ayupov 2025-05-10 21:01:49 -07:00 committed by GitHub
parent 066bc49f76
commit 8f31c6dde7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 26 additions and 8 deletions

View File

@ -246,6 +246,9 @@ struct FuncSampleData {
/// Get the number of samples recorded in [Start, End) /// Get the number of samples recorded in [Start, End)
uint64_t getSamples(uint64_t Start, uint64_t End) const; uint64_t getSamples(uint64_t Start, uint64_t End) const;
/// Returns the total number of samples recorded in this function.
uint64_t getSamples() const;
/// Aggregation helper /// Aggregation helper
DenseMap<uint64_t, size_t> Index; DenseMap<uint64_t, size_t> Index;

View File

@ -565,15 +565,14 @@ void DataAggregator::processProfile(BinaryContext &BC) {
processMemEvents(); processMemEvents();
// Mark all functions with registered events as having a valid profile. // Mark all functions with registered events as having a valid profile.
const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
: BinaryFunction::PF_LBR;
for (auto &BFI : BC.getBinaryFunctions()) { for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second; BinaryFunction &BF = BFI.second;
FuncBranchData *FBD = getBranchData(BF); if (FuncBranchData *FBD = getBranchData(BF)) {
if (FBD || getFuncSampleData(BF.getNames())) { BF.markProfiled(BinaryFunction::PF_LBR);
BF.markProfiled(Flags); BF.RawBranchCount = FBD->getNumExecutedBranches();
if (FBD) } else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) {
BF.RawBranchCount = FBD->getNumExecutedBranches(); BF.markProfiled(BinaryFunction::PF_SAMPLE);
BF.RawBranchCount = FSD->getSamples();
} }
} }
@ -630,10 +629,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address, bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
uint64_t Count) { uint64_t Count) {
// To record executed bytes, use basic block size as is regardless of BAT.
uint64_t BlockSize = 0;
if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
Address - OrigFunc.getAddress()))
BlockSize = BB->getOriginalSize();
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc); BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc; BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress()))) if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
NumColdSamples += Count; NumColdSamples += Count;
// Attach executed bytes to parent function in case of cold fragment.
Func.SampleCountInBytes += Count * BlockSize;
auto I = NamesToSamples.find(Func.getOneName()); auto I = NamesToSamples.find(Func.getOneName());
if (I == NamesToSamples.end()) { if (I == NamesToSamples.end()) {

View File

@ -128,6 +128,13 @@ uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
return Result; return Result;
} }
uint64_t FuncSampleData::getSamples() const {
uint64_t Result = 0;
for (const SampleInfo &I : Data)
Result += I.Hits;
return Result;
}
void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) { void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
auto Iter = Index.find(Offset); auto Iter = Index.find(Offset);
if (Iter == Index.end()) { if (Iter == Index.end()) {

View File

@ -8,6 +8,7 @@ RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
CHECK-NOT: PERF2BOLT-ERROR CHECK-NOT: PERF2BOLT-ERROR
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4 RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4 RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4