[BOLT] Support profile density with basic samples (#137644)
For profile with LBR samples, binary function profile density is computed as a ratio of executed bytes to function size in bytes. For profile with IP samples, use the size of basic block containing the sample IP as a numerator. Test Plan: updated perf_test.test
This commit is contained in:
parent
066bc49f76
commit
8f31c6dde7
@ -246,6 +246,9 @@ struct FuncSampleData {
|
|||||||
/// Get the number of samples recorded in [Start, End)
|
/// Get the number of samples recorded in [Start, End)
|
||||||
uint64_t getSamples(uint64_t Start, uint64_t End) const;
|
uint64_t getSamples(uint64_t Start, uint64_t End) const;
|
||||||
|
|
||||||
|
/// Returns the total number of samples recorded in this function.
|
||||||
|
uint64_t getSamples() const;
|
||||||
|
|
||||||
/// Aggregation helper
|
/// Aggregation helper
|
||||||
DenseMap<uint64_t, size_t> Index;
|
DenseMap<uint64_t, size_t> Index;
|
||||||
|
|
||||||
|
@ -565,15 +565,14 @@ void DataAggregator::processProfile(BinaryContext &BC) {
|
|||||||
processMemEvents();
|
processMemEvents();
|
||||||
|
|
||||||
// Mark all functions with registered events as having a valid profile.
|
// Mark all functions with registered events as having a valid profile.
|
||||||
const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
|
|
||||||
: BinaryFunction::PF_LBR;
|
|
||||||
for (auto &BFI : BC.getBinaryFunctions()) {
|
for (auto &BFI : BC.getBinaryFunctions()) {
|
||||||
BinaryFunction &BF = BFI.second;
|
BinaryFunction &BF = BFI.second;
|
||||||
FuncBranchData *FBD = getBranchData(BF);
|
if (FuncBranchData *FBD = getBranchData(BF)) {
|
||||||
if (FBD || getFuncSampleData(BF.getNames())) {
|
BF.markProfiled(BinaryFunction::PF_LBR);
|
||||||
BF.markProfiled(Flags);
|
BF.RawBranchCount = FBD->getNumExecutedBranches();
|
||||||
if (FBD)
|
} else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) {
|
||||||
BF.RawBranchCount = FBD->getNumExecutedBranches();
|
BF.markProfiled(BinaryFunction::PF_SAMPLE);
|
||||||
|
BF.RawBranchCount = FSD->getSamples();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -630,10 +629,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
|
|||||||
|
|
||||||
bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
|
bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
|
||||||
uint64_t Count) {
|
uint64_t Count) {
|
||||||
|
// To record executed bytes, use basic block size as is regardless of BAT.
|
||||||
|
uint64_t BlockSize = 0;
|
||||||
|
if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
|
||||||
|
Address - OrigFunc.getAddress()))
|
||||||
|
BlockSize = BB->getOriginalSize();
|
||||||
|
|
||||||
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
|
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
|
||||||
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
|
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
|
||||||
if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
|
if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
|
||||||
NumColdSamples += Count;
|
NumColdSamples += Count;
|
||||||
|
// Attach executed bytes to parent function in case of cold fragment.
|
||||||
|
Func.SampleCountInBytes += Count * BlockSize;
|
||||||
|
|
||||||
auto I = NamesToSamples.find(Func.getOneName());
|
auto I = NamesToSamples.find(Func.getOneName());
|
||||||
if (I == NamesToSamples.end()) {
|
if (I == NamesToSamples.end()) {
|
||||||
|
@ -128,6 +128,13 @@ uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
|
|||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t FuncSampleData::getSamples() const {
|
||||||
|
uint64_t Result = 0;
|
||||||
|
for (const SampleInfo &I : Data)
|
||||||
|
Result += I.Hits;
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
|
void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
|
||||||
auto Iter = Index.find(Offset);
|
auto Iter = Index.find(Offset);
|
||||||
if (Iter == Index.end()) {
|
if (Iter == Index.end()) {
|
||||||
|
@ -8,6 +8,7 @@ RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
|
|||||||
|
|
||||||
CHECK-NOT: PERF2BOLT-ERROR
|
CHECK-NOT: PERF2BOLT-ERROR
|
||||||
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
|
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
|
||||||
|
CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
|
||||||
|
|
||||||
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
|
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
|
||||||
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
|
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
|
||||||
|
Loading…
x
Reference in New Issue
Block a user