[BOLT] Support buildid in pre-aggregated profile

Sample addresses belonging to external DSOs (buildid doesn't match the
current file) are treated as external (0).

Buildid for the main binary is expected to be omitted.

Test Plan: added pre-aggregated-perf-buildid.test

Reviewers:
paschalis-mpeis, maksfb, yavtuk, ayermolo, yozhu, rafaelauler, yota9

Reviewed By: paschalis-mpeis

Pull Request: https://github.com/llvm/llvm-project/pull/186931
This commit is contained in:
Amir Ayupov 2026-03-14 20:38:37 -07:00
parent 31b17c4789
commit fce6895804
2 changed files with 105 additions and 3 deletions

View File

@ -146,6 +146,14 @@ private:
std::unordered_map<uint64_t, uint64_t> BasicSamples;
std::vector<PerfMemSample> MemSamples;
/// Filter pre-aggregated entries belonging to a DSO with this buildid.
/// Set when processing a shared library, empty implies main binary.
StringRef FilterBuildID;
/// Per-DSO samples breakdown: buildid -> sample count.
StringMap<uint64_t> DSOSamples;
/// Cross-DSO branch counts: (FromDSO -> ToDSO) -> branch count.
std::unordered_map<std::pair<StringRef, StringRef>, uint64_t> CrossDSOSamples;
template <typename T> void clear(T &Container) {
T TempContainer;
TempContainer.swap(Container);
@ -579,6 +587,7 @@ private:
void printBranchSamplesDiagnostics() const;
void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;
void printDSODiagnostics() const;
/// Get instruction at \p Addr either from containing binary function or
/// disassemble in-place, and invoke \p Callback on resulting MCInst.

View File

@ -392,6 +392,20 @@ void DataAggregator::parsePreAggregated() {
ParsingBuf = FileBuf->getBuffer();
Col = 0;
Line = 1;
// When processing a shared object, filter pre-aggregated entries by buildid.
if (BC && !BC->HasFixedLoadAddress &&
BC->getFilename().ends_with(".so")) {
if (auto FileBID = BC->getFileBuildID()) {
FilterBuildID = *FileBID;
outs() << "PERF2BOLT: filtering pre-aggregated data for buildid "
<< *FileBID << "\n";
} else {
errs() << "PERF2BOLT-WARNING: cannot read buildid from input binary, "
"won't filter pre-aggregated data\n";
}
}
if (parsePreAggregatedLBRSamples()) {
errs() << "PERF2BOLT: failed to parse samples\n";
exit(1);
@ -735,6 +749,8 @@ void DataAggregator::processProfile(BinaryContext &BC) {
else
processBranchEvents();
printDSODiagnostics();
processMemEvents();
// Mark all functions with registered events as having a valid profile.
@ -1438,14 +1454,27 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
return std::error_code();
}
int64_t Count = Counters[0];
int64_t Mispreds = Counters[1];
if (Addr[0]) {
// Per-DSO sample count
DSOSamples[Addr[0]->Name] += Count;
// Cross-DSO branch count
if (Addr[1] && Addr[1]->Name != Addr[0]->Name)
CrossDSOSamples[{Addr[0]->Name, Addr[1]->Name}] += Count;
}
// Reset external addresses.
for (std::optional<Location> &Loc : Addr)
if (Loc && Loc->Name != FilterBuildID)
Loc->Offset = Trace::EXTERNAL;
const uint64_t FromOffset = Addr[0]->Offset;
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
if (FromFunc)
FromFunc->setHasProfileAvailable();
int64_t Count = Counters[0];
int64_t Mispreds = Counters[1];
/// Record basic IP sample into \p BasicSamples and return.
if (Type == SAMPLE) {
BasicSamples[FromOffset] += Count;
@ -1662,6 +1691,70 @@ void DataAggregator::printBranchStacksDiagnostics(
"were attributed to the input binary\n";
}
void DataAggregator::printDSODiagnostics() const {
// No buildids (except main binary).
if (DSOSamples.size() == 1)
return;
// Main binary: show DSOs covering 95th percentile of sample count.
if (FilterBuildID.empty()) {
// Sort DSOs by sample count.
std::vector<std::pair<std::string, uint64_t>> DSOs;
DSOs.reserve(DSOSamples.size());
for (const auto &[DSO, Count] : DSOSamples)
DSOs.emplace_back(DSO, Count);
llvm::sort(DSOs, llvm::less_second());
outs() << "PERF2BOLT: DSOs covering 95th percentile by samples:\n";
uint64_t CumulativeCount = 0;
for (auto &[DSO, Count] : llvm::reverse(DSOs)) {
CumulativeCount += Count;
if (DSO.empty())
DSO = "(binary)";
outs() << "\t" << DSO << ": " << Count << " samples, "
<< format("%.1f%%", Count * 100.0f / NumTotalSamples)
<< " of total, "
<< format("%.1f%%", CumulativeCount * 100.0f / NumTotalSamples)
<< " cumulative\n";
if (CumulativeCount * 100 >= NumTotalSamples * 95)
break;
}
}
// Cross-DSO branches: show DSO pairs covering 95th percentile of branch count.
if (CrossDSOSamples.size() > 1) {
// Sort DSO pairs by branch count.
std::vector<std::pair<std::pair<std::string, std::string>, uint64_t>>
XDSOs;
XDSOs.reserve(CrossDSOSamples.size());
// For main binary, include all DSOs. For FilterBuildID case, only include
// branches belonging to that DSO.
uint64_t NumTotalBranches = 0;
for (const auto &[FromTo, Count] : CrossDSOSamples) {
bool ShouldInclude = FilterBuildID.empty() || FromTo.first == FilterBuildID;
if (!ShouldInclude)
continue;
NumTotalBranches += Count;
XDSOs.emplace_back(FromTo, Count);
}
llvm::sort(XDSOs, llvm::less_second());
outs() << "PERF2BOLT: DSO pairs covering 95th percentile of branches:\n";
uint64_t CumulativeCount = 0;
for (auto &[FromTo, Count] : llvm::reverse(XDSOs)) {
CumulativeCount += Count;
outs() << "\t" << FromTo.first << " -> " << FromTo.second << ": " << Count
<< " branches, "
<< format("%.1f%%", Count * 100.0f / NumTotalBranches)
<< " of total, "
<< format("%.1f%%", CumulativeCount * 100.0f / NumTotalBranches)
<< " cumulative\n";
if (CumulativeCount * 100 >= NumTotalBranches * 95)
break;
}
}
}
std::error_code DataAggregator::parseBranchEvents() {
std::string BranchEventTypeStr =
opts::ArmSPE ? "SPE branch events in brstack-format" : "branch events";