diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 6ff1a66e76c6..f6374eadf019 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -146,6 +146,14 @@ private: std::unordered_map BasicSamples; std::vector MemSamples; + /// Filter pre-aggregated entries belonging to a DSO with this buildid. + /// Set when processing a shared library, empty implies main binary. + StringRef FilterBuildID; + /// Per-DSO samples breakdown: buildid -> sample count. + StringMap DSOSamples; + /// Cross-DSO branch counts: (FromDSO -> ToDSO) -> branch count. + std::unordered_map, uint64_t> CrossDSOSamples; + template void clear(T &Container) { T TempContainer; TempContainer.swap(Container); @@ -579,6 +587,7 @@ private: void printBranchSamplesDiagnostics() const; void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const; void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const; + void printDSODiagnostics() const; /// Get instruction at \p Addr either from containing binary function or /// disassemble in-place, and invoke \p Callback on resulting MCInst. diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index c770656dbadf..abf9ec4f5aa9 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -392,6 +392,20 @@ void DataAggregator::parsePreAggregated() { ParsingBuf = FileBuf->getBuffer(); Col = 0; Line = 1; + + // When processing a shared object, filter pre-aggregated entries by buildid. + if (BC && !BC->HasFixedLoadAddress && + BC->getFilename().ends_with(".so")) { + if (auto FileBID = BC->getFileBuildID()) { + FilterBuildID = *FileBID; + outs() << "PERF2BOLT: filtering pre-aggregated data for buildid " + << *FileBID << "\n"; + } else { + errs() << "PERF2BOLT-WARNING: cannot read buildid from input binary, " + "won't filter pre-aggregated data\n"; + } + } + if (parsePreAggregatedLBRSamples()) { errs() << "PERF2BOLT: failed to parse samples\n"; exit(1); @@ -735,6 +749,8 @@ void DataAggregator::processProfile(BinaryContext &BC) { else processBranchEvents(); + printDSODiagnostics(); + processMemEvents(); // Mark all functions with registered events as having a valid profile. @@ -1438,14 +1454,27 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { return std::error_code(); } + int64_t Count = Counters[0]; + int64_t Mispreds = Counters[1]; + + if (Addr[0]) { + // Per-DSO sample count + DSOSamples[Addr[0]->Name] += Count; + // Cross-DSO branch count + if (Addr[1] && Addr[1]->Name != Addr[0]->Name) + CrossDSOSamples[{Addr[0]->Name, Addr[1]->Name}] += Count; + } + + // Reset external addresses. + for (std::optional &Loc : Addr) + if (Loc && Loc->Name != FilterBuildID) + Loc->Offset = Trace::EXTERNAL; + const uint64_t FromOffset = Addr[0]->Offset; BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset); if (FromFunc) FromFunc->setHasProfileAvailable(); - int64_t Count = Counters[0]; - int64_t Mispreds = Counters[1]; - /// Record basic IP sample into \p BasicSamples and return. if (Type == SAMPLE) { BasicSamples[FromOffset] += Count; @@ -1662,6 +1691,70 @@ void DataAggregator::printBranchStacksDiagnostics( "were attributed to the input binary\n"; } +void DataAggregator::printDSODiagnostics() const { + // No buildids (except main binary). + if (DSOSamples.size() == 1) + return; + + // Main binary: show DSOs covering 95th percentile of sample count. + if (FilterBuildID.empty()) { + // Sort DSOs by sample count. + std::vector> DSOs; + DSOs.reserve(DSOSamples.size()); + for (const auto &[DSO, Count] : DSOSamples) + DSOs.emplace_back(DSO, Count); + llvm::sort(DSOs, llvm::less_second()); + + outs() << "PERF2BOLT: DSOs covering 95th percentile by samples:\n"; + uint64_t CumulativeCount = 0; + for (auto &[DSO, Count] : llvm::reverse(DSOs)) { + CumulativeCount += Count; + if (DSO.empty()) + DSO = "(binary)"; + outs() << "\t" << DSO << ": " << Count << " samples, " + << format("%.1f%%", Count * 100.0f / NumTotalSamples) + << " of total, " + << format("%.1f%%", CumulativeCount * 100.0f / NumTotalSamples) + << " cumulative\n"; + if (CumulativeCount * 100 >= NumTotalSamples * 95) + break; + } + } + + // Cross-DSO branches: show DSO pairs covering 95th percentile of branch count. + if (CrossDSOSamples.size() > 1) { + // Sort DSO pairs by branch count. + std::vector, uint64_t>> + XDSOs; + XDSOs.reserve(CrossDSOSamples.size()); + // For main binary, include all DSOs. For FilterBuildID case, only include + // branches belonging to that DSO. + uint64_t NumTotalBranches = 0; + for (const auto &[FromTo, Count] : CrossDSOSamples) { + bool ShouldInclude = FilterBuildID.empty() || FromTo.first == FilterBuildID; + if (!ShouldInclude) + continue; + NumTotalBranches += Count; + XDSOs.emplace_back(FromTo, Count); + } + llvm::sort(XDSOs, llvm::less_second()); + + outs() << "PERF2BOLT: DSO pairs covering 95th percentile of branches:\n"; + uint64_t CumulativeCount = 0; + for (auto &[FromTo, Count] : llvm::reverse(XDSOs)) { + CumulativeCount += Count; + outs() << "\t" << FromTo.first << " -> " << FromTo.second << ": " << Count + << " branches, " + << format("%.1f%%", Count * 100.0f / NumTotalBranches) + << " of total, " + << format("%.1f%%", CumulativeCount * 100.0f / NumTotalBranches) + << " cumulative\n"; + if (CumulativeCount * 100 >= NumTotalBranches * 95) + break; + } + } +} + std::error_code DataAggregator::parseBranchEvents() { std::string BranchEventTypeStr = opts::ArmSPE ? "SPE branch events in brstack-format" : "branch events";