//===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This family of functions reads profile data written by perf record, // aggregate it and then write it back to an output file. // //===----------------------------------------------------------------------===// #include "bolt/Profile/DataAggregator.h" #include "bolt/Core/BinaryContext.h" #include "bolt/Core/BinaryFunction.h" #include "bolt/Passes/BinaryPasses.h" #include "bolt/Profile/BoltAddressTranslation.h" #include "bolt/Profile/Heatmap.h" #include "bolt/Profile/YAMLProfileWriter.h" #include "bolt/Utils/CommandLineOpts.h" #include "bolt/Utils/Utils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" #include "llvm/Support/Regex.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #define DEBUG_TYPE "aggregator" using namespace llvm; using namespace bolt; namespace opts { static cl::opt BasicAggregation("nl", cl::desc("aggregate basic samples (without LBR info)"), cl::cat(AggregatorCategory)); cl::opt ArmSPE("spe", cl::desc("Enable Arm SPE mode."), cl::cat(AggregatorCategory)); static cl::opt ITraceAggregation("itrace", cl::desc("Generate LBR info with perf itrace argument"), cl::cat(AggregatorCategory)); static cl::opt FilterMemProfile("filter-mem-profile", cl::desc("if processing a memory profile, filter out stack or heap accesses " "that won't be useful for BOLT to reduce profile file size"), cl::init(true), cl::cat(AggregatorCategory)); static cl::opt ParseMemProfile( "parse-mem-profile", cl::desc("enable memory profile parsing if it's present in the input data, " "on by default unless `--itrace` is set."), cl::init(true), cl::cat(AggregatorCategory)); static cl::opt FilterPID("pid", cl::desc("only use samples from process with specified PID"), cl::init(0), cl::Optional, cl::cat(AggregatorCategory)); static cl::opt IgnoreBuildID("ignore-build-id", cl::desc("continue even if build-ids in input binary and perf.data mismatch"), cl::init(false), cl::cat(AggregatorCategory)); static cl::opt IgnoreInterruptLBR( "ignore-interrupt-lbr", cl::desc("ignore kernel interrupt LBR that happens asynchronously"), cl::init(true), cl::cat(AggregatorCategory)); static cl::opt MaxSamples("max-samples", cl::init(-1ULL), cl::desc("maximum number of samples to read from LBR profile"), cl::Optional, cl::Hidden, cl::cat(AggregatorCategory)); extern cl::opt ProfileFormat; extern cl::opt ProfileWritePseudoProbes; extern cl::opt SaveProfile; cl::opt ReadPreAggregated( "pa", cl::desc("skip perf and read data from a pre-aggregated file format"), cl::cat(AggregatorCategory)); cl::opt ReadPerfEvents("perf-script-events", cl::desc("skip perf event collection by supplying a " "perf-script output in a textual format"), cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory)); static cl::opt TimeAggregator("time-aggr", cl::desc("time BOLT aggregator"), cl::init(false), cl::ZeroOrMore, cl::cat(AggregatorCategory)); } // namespace opts namespace { const char TimerGroupName[] = "aggregator"; const char TimerGroupDesc[] = "Aggregator"; std::vector getTextSections(const BinaryContext *BC) { std::vector sections; for (BinarySection &Section : BC->sections()) { if (!Section.isText()) continue; if (Section.getSize() == 0) continue; sections.push_back( {Section.getName(), Section.getAddress(), Section.getEndAddress()}); } llvm::sort(sections, [](const SectionNameAndRange &A, const SectionNameAndRange &B) { return A.BeginAddress < B.BeginAddress; }); return sections; } } constexpr uint64_t DataAggregator::KernelBaseAddr; DataAggregator::~DataAggregator() { deleteTempFiles(); } namespace { void deleteTempFile(const std::string &FileName) { if (std::error_code Errc = sys::fs::remove(FileName.c_str())) errs() << "PERF2BOLT: failed to delete temporary file " << FileName << " with error " << Errc.message() << "\n"; } } void DataAggregator::deleteTempFiles() { for (std::string &FileName : TempFiles) deleteTempFile(FileName); TempFiles.clear(); } void DataAggregator::findPerfExecutable() { std::optional PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf"); if (!PerfExecutable) { outs() << "PERF2BOLT: No perf executable found!\n"; exit(1); } PerfPath = *PerfExecutable; } void DataAggregator::start() { outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; // Turn on heatmap building if requested by --heatmap flag. if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences()) opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional; // Don't launch perf for pre-aggregated files or when perf input is specified // by the user. if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty()) return; findPerfExecutable(); if (opts::ArmSPE) { // pid from_ip to_ip flags // where flags could be: // P/M: whether branch was Predicted or Mispredicted. // N: optionally appears when the branch was Not-Taken (ie fall-through) // 12345 0x123/0x456/PN/-/-/8/RET/- opts::ITraceAggregation = "bl"; opts::ParseMemProfile = true; opts::BasicAggregation = false; } if (opts::BasicAggregation) { launchPerfProcess("events without LBR", MainEventsPPI, "script -F pid,event,ip", /*Wait = */ false); } else if (!opts::ITraceAggregation.empty()) { // Disable parsing memory profile from trace data, unless requested by user. if (!opts::ParseMemProfile.getNumOccurrences()) opts::ParseMemProfile = false; std::string ItracePerfScriptArgs = llvm::formatv( "script -F pid,brstack --itrace={0}", opts::ITraceAggregation); launchPerfProcess("branch events with itrace", MainEventsPPI, ItracePerfScriptArgs.c_str(), /*Wait = */ false); } else { launchPerfProcess("branch events", MainEventsPPI, "script -F pid,brstack", /*Wait = */ false); } if (opts::ParseMemProfile) launchPerfProcess("mem events", MemEventsPPI, "script -F pid,event,addr,ip", /*Wait = */ false); launchPerfProcess("process events", MMapEventsPPI, "script --show-mmap-events --no-itrace", /*Wait = */ false); launchPerfProcess("task events", TaskEventsPPI, "script --show-task-events --no-itrace", /*Wait = */ false); } void DataAggregator::abort() { if (opts::ReadPreAggregated) return; std::string Error; // Kill subprocesses in case they are not finished sys::Wait(TaskEventsPPI.PI, 1, &Error); sys::Wait(MMapEventsPPI.PI, 1, &Error); sys::Wait(MainEventsPPI.PI, 1, &Error); if (opts::ParseMemProfile) sys::Wait(MemEventsPPI.PI, 1, &Error); deleteTempFiles(); exit(1); } void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, const char *ArgsString, bool Wait) { SmallVector Argv; outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n'; Argv.push_back(PerfPath.data()); StringRef(ArgsString).split(Argv, ' '); Argv.push_back("-f"); Argv.push_back("-i"); Argv.push_back(Filename.c_str()); if (std::error_code Errc = sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) { errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath << " with error " << Errc.message() << "\n"; exit(1); } TempFiles.push_back(PPI.StdoutPath.data()); if (std::error_code Errc = sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) { errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath << " with error " << Errc.message() << "\n"; exit(1); } TempFiles.push_back(PPI.StderrPath.data()); std::optional Redirects[] = { std::nullopt, // Stdin StringRef(PPI.StdoutPath.data()), // Stdout StringRef(PPI.StderrPath.data())}; // Stderr LLVM_DEBUG({ dbgs() << "Launching perf: "; for (StringRef Arg : Argv) dbgs() << Arg << " "; dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data() << "\n"; }); if (Wait) PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv, /*envp*/ std::nullopt, Redirects); else PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ std::nullopt, Redirects); } void DataAggregator::processFileBuildID(StringRef FileBuildID) { PerfProcessInfo BuildIDProcessInfo; launchPerfProcess("buildid list", BuildIDProcessInfo, "buildid-list", /*Wait = */true); if (BuildIDProcessInfo.PI.ReturnCode != 0) { ErrorOr> MB = MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data()); StringRef ErrBuf = (*MB)->getBuffer(); errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode << '\n'; errs() << ErrBuf; return; } ErrorOr> MB = MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data()); if (std::error_code EC = MB.getError()) { errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": " << EC.message() << "\n"; return; } FileBuf = std::move(*MB); ParsingBuf = FileBuf->getBuffer(); std::optional FileName = getFileNameForBuildID(FileBuildID); if (!FileName) { if (hasAllBuildIDs()) { errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. " "This indicates the input binary supplied for data aggregation " "is not the same recorded by perf when collecting profiling " "data, or there were no samples recorded for the binary. " "Use -ignore-build-id option to override.\n"; if (!opts::IgnoreBuildID) abort(); } else { errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf " "data was recorded without it\n"; return; } } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) { errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n"; BuildIDBinaryName = std::string(*FileName); } else { outs() << "PERF2BOLT: matched build-id and file name\n"; } } bool DataAggregator::checkPerfDataMagic(StringRef FileName) { if (opts::ReadPreAggregated) return true; Expected FD = sys::fs::openNativeFileForRead(FileName); if (!FD) { consumeError(FD.takeError()); return false; } char Buf[7] = {0, 0, 0, 0, 0, 0, 0}; auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); }); Expected BytesRead = sys::fs::readNativeFileSlice( *FD, MutableArrayRef(Buf, sizeof(Buf)), 0); if (!BytesRead) { consumeError(BytesRead.takeError()); return false; } if (*BytesRead != 7) return false; if (strncmp(Buf, "PERFILE", 7) == 0) return true; return false; } void DataAggregator::parsePreAggregated() { ErrorOr> MB = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = MB.getError()) { errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " << EC.message() << "\n"; exit(1); } FileBuf = std::move(*MB); ParsingBuf = FileBuf->getBuffer(); Col = 0; Line = 1; if (parsePreAggregatedLBRSamples()) { errs() << "PERF2BOLT: failed to parse samples\n"; exit(1); } } void DataAggregator::filterBinaryMMapInfo() { if (opts::FilterPID) { auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID); if (MMapInfoIter != BinaryMMapInfo.end()) { MMapInfo MMap = MMapInfoIter->second; BinaryMMapInfo.clear(); BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap)); } else { if (errs().has_colors()) errs().changeColor(raw_ostream::RED); errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \"" << opts::FilterPID << "\"" << " for binary \"" << BC->getFilename() << "\"."; assert(!BinaryMMapInfo.empty() && "No memory map for matching binary"); errs() << " Profile for the following process is available:\n"; for (std::pair &MMI : BinaryMMapInfo) outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)\n" : "\n"); if (errs().has_colors()) errs().resetColor(); exit(1); } } } int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, PerfProcessErrorCallbackTy Callback) { if (!opts::ReadPerfEvents.empty()) { outs() << "PERF2BOLT: using pre-processed perf events for '" << Name << "' (perf-script-events)\n"; ParsingBuf = opts::ReadPerfEvents; return 0; } std::string Error; outs() << "PERF2BOLT: waiting for perf " << Name << " collection to finish...\n"; sys::ProcessInfo PI = sys::Wait(Process.PI, std::nullopt, &Error); if (!Error.empty()) { errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n"; deleteTempFiles(); exit(1); } if (PI.ReturnCode != 0) { ErrorOr> ErrorMB = MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data()); StringRef ErrBuf = (*ErrorMB)->getBuffer(); deleteTempFiles(); Callback(PI.ReturnCode, ErrBuf); return PI.ReturnCode; } ErrorOr> MB = MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data()); if (std::error_code EC = MB.getError()) { errs() << "Cannot open " << Process.StdoutPath.data() << ": " << EC.message() << "\n"; deleteTempFiles(); exit(1); } FileBuf = std::move(*MB); ParsingBuf = FileBuf->getBuffer(); Col = 0; Line = 1; return PI.ReturnCode; } Error DataAggregator::preprocessProfile(BinaryContext &BC) { this->BC = &BC; auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) { errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf; exit(1); }; auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) { Regex NoData("Samples for '.*' event do not have ADDR attribute set. " "Cannot print 'addr' field."); if (!NoData.match(ErrBuf)) ErrorCallback(ReturnCode, ErrBuf); }; if (opts::ReadPreAggregated) { parsePreAggregated(); goto heatmap; } if (std::optional FileBuildID = BC.getFileBuildID()) { outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; processFileBuildID(*FileBuildID); } else { errs() << "BOLT-WARNING: build-id will not be checked because we could " "not read one from input binary\n"; } if (BC.IsLinuxKernel) { // Current MMap parsing logic does not work with linux kernel. // MMap entries for linux kernel uses PERF_RECORD_MMAP // format instead of typical PERF_RECORD_MMAP2 format. // Since linux kernel address mapping is absolute (same as // in the ELF file), we avoid parsing MMap in linux kernel mode. // While generating optimized linux kernel binary, we may need // to parse MMap entries. // In linux kernel mode, we analyze and optimize // all linux kernel binary instructions, irrespective // of whether they are due to system calls or due to // interrupts. Therefore, we cannot ignore interrupt // in Linux kernel mode. opts::IgnoreInterruptLBR = false; } else { prepareToParse("mmap events", MMapEventsPPI, ErrorCallback); if (parseMMapEvents()) errs() << "PERF2BOLT: failed to parse mmap events\n"; } prepareToParse("task events", TaskEventsPPI, ErrorCallback); if (parseTaskEvents()) errs() << "PERF2BOLT: failed to parse task events\n"; filterBinaryMMapInfo(); prepareToParse("events", MainEventsPPI, ErrorCallback); if ((!opts::BasicAggregation && parseBranchEvents()) || (opts::BasicAggregation && parseBasicEvents())) errs() << "PERF2BOLT: failed to parse samples\n"; // Special handling for memory events if (opts::ParseMemProfile && !prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback)) if (const std::error_code EC = parseMemEvents()) errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() << '\n'; deleteTempFiles(); heatmap: // Sort parsed traces for faster processing. if (!opts::BasicAggregation) llvm::sort(Traces, llvm::less_first()); if (!opts::HeatmapMode) return Error::success(); if (std::error_code EC = printLBRHeatMap()) return errorCodeToError(EC); if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Optional) return Error::success(); assert(opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive); exit(0); } Error DataAggregator::readProfile(BinaryContext &BC) { processProfile(BC); for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &Function = BFI.second; convertBranchData(Function); } if (opts::AggregateOnly) { if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata) if (std::error_code EC = writeAggregatedFile(opts::OutputFilename)) report_error("cannot create output data file", EC); // BAT YAML is handled by DataAggregator since normal YAML output requires // CFG which is not available in BAT mode. if (usesBAT()) { if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) if (std::error_code EC = writeBATYAML(BC, opts::OutputFilename)) report_error("cannot create output data file", EC); if (!opts::SaveProfile.empty()) if (std::error_code EC = writeBATYAML(BC, opts::SaveProfile)) report_error("cannot create output data file", EC); } } return Error::success(); } bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { return Function.hasProfileAvailable(); } void DataAggregator::processProfile(BinaryContext &BC) { if (opts::BasicAggregation) processBasicEvents(); else processBranchEvents(); processMemEvents(); // Mark all functions with registered events as having a valid profile. for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &BF = BFI.second; if (FuncBranchData *FBD = getBranchData(BF)) { BF.markProfiled(BinaryFunction::PF_BRANCH); BF.RawSampleCount = FBD->getNumExecutedBranches(); } else if (FuncBasicSampleData *FSD = getFuncBasicSampleData(BF.getNames())) { BF.markProfiled(BinaryFunction::PF_BASIC); BF.RawSampleCount = FSD->getSamples(); } } for (auto &FuncBranches : NamesToBranches) { llvm::stable_sort(FuncBranches.second.Data); llvm::stable_sort(FuncBranches.second.EntryData); } for (auto &MemEvents : NamesToMemEvents) llvm::stable_sort(MemEvents.second.Data); // Release intermediate storage. clear(Traces); clear(BasicSamples); clear(MemSamples); } BinaryFunction * DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const { if (!BC->containsAddress(Address)) return nullptr; return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, /*UseMaxSize=*/true); } BinaryFunction * DataAggregator::getBATParentFunction(const BinaryFunction &Func) const { if (BAT) if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) return getBinaryFunctionContainingAddress(HotAddr); return nullptr; } StringRef DataAggregator::getLocationName(const BinaryFunction &Func, bool BAT) { if (!BAT) return Func.getOneName(); const BinaryFunction *OrigFunc = &Func; // If it is a local function, prefer the name containing the file name where // the local function was declared for (StringRef AlternativeName : OrigFunc->getNames()) { size_t FileNameIdx = AlternativeName.find('/'); // Confirm the alternative name has the pattern Symbol/FileName/1 before // using it if (FileNameIdx == StringRef::npos || AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos) continue; return AlternativeName; } return OrigFunc->getOneName(); } bool DataAggregator::doBasicSample(BinaryFunction &OrigFunc, uint64_t Address, uint64_t Count) { // To record executed bytes, use basic block size as is regardless of BAT. uint64_t BlockSize = 0; if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset( Address - OrigFunc.getAddress())) BlockSize = BB->getOriginalSize(); BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc); BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc; // Attach executed bytes to parent function in case of cold fragment. Func.SampleCountInBytes += Count * BlockSize; auto I = NamesToBasicSamples.find(Func.getOneName()); if (I == NamesToBasicSamples.end()) { bool Success; StringRef LocName = getLocationName(Func, BAT); std::tie(I, Success) = NamesToBasicSamples.insert(std::make_pair( Func.getOneName(), FuncBasicSampleData(LocName, FuncBasicSampleData::ContainerTy()))); } Address -= Func.getAddress(); if (BAT) Address = BAT->translate(Func.getAddress(), Address, /*IsBranchSrc=*/false); I->second.bumpCount(Address, Count); return true; } bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds) { FuncBranchData *AggrData = getBranchData(Func); if (!AggrData) { AggrData = &NamesToBranches[Func.getOneName()]; AggrData->Name = getLocationName(Func, BAT); setBranchData(Func, AggrData); } LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To)); AggrData->bumpBranchCount(From, To, Count, Mispreds); return true; } bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, BinaryFunction *ToFunc, uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds) { FuncBranchData *FromAggrData = nullptr; FuncBranchData *ToAggrData = nullptr; StringRef SrcFunc; StringRef DstFunc; if (FromFunc) { SrcFunc = getLocationName(*FromFunc, BAT); FromAggrData = getBranchData(*FromFunc); if (!FromAggrData) { FromAggrData = &NamesToBranches[FromFunc->getOneName()]; FromAggrData->Name = SrcFunc; setBranchData(*FromFunc, FromAggrData); } recordExit(*FromFunc, From, Mispreds, Count); } if (ToFunc) { DstFunc = getLocationName(*ToFunc, BAT); ToAggrData = getBranchData(*ToFunc); if (!ToAggrData) { ToAggrData = &NamesToBranches[ToFunc->getOneName()]; ToAggrData->Name = DstFunc; setBranchData(*ToFunc, ToAggrData); } recordEntry(*ToFunc, To, Mispreds, Count); } if (FromAggrData) FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To), Count, Mispreds); if (ToAggrData) ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To, Count, Mispreds); return true; } bool DataAggregator::checkReturn(uint64_t Addr) { auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); }; if (llvm::is_contained(Returns, Addr)) return true; BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr); if (!Func) return false; const uint64_t Offset = Addr - Func->getAddress(); if (Func->hasInstructions() ? isReturn(Func->getInstructionAtOffset(Offset)) : isReturn(Func->disassembleInstructionAtOffset(Offset))) { Returns.emplace(Addr); return true; } return false; } bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds) { // Mutates \p Addr to an offset into the containing function, performing BAT // offset translation and parent lookup. // // Returns the containing function (or BAT parent). auto handleAddress = [&](uint64_t &Addr, bool IsFrom) { BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr); if (!Func) { Addr = 0; return Func; } Addr -= Func->getAddress(); if (BAT) Addr = BAT->translate(Func->getAddress(), Addr, IsFrom); if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) return ParentFunc; return Func; }; BinaryFunction *FromFunc = handleAddress(From, /*IsFrom*/ true); BinaryFunction *ToFunc = handleAddress(To, /*IsFrom*/ false); if (!FromFunc && !ToFunc) return false; // Treat recursive control transfers as inter-branches. if (FromFunc == ToFunc && To != 0) { recordBranch(*FromFunc, From, To, Count, Mispreds); return doIntraBranch(*FromFunc, From, To, Count, Mispreds); } return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); } bool DataAggregator::doTrace(const Trace &Trace, uint64_t Count, bool IsReturn) { const uint64_t From = Trace.From, To = Trace.To; BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From); BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To); NumTraces += Count; if (!FromFunc || !ToFunc) { LLVM_DEBUG(dbgs() << "Out of range trace " << Trace << '\n'); NumLongRangeTraces += Count; return false; } if (FromFunc != ToFunc) { LLVM_DEBUG(dbgs() << "Invalid trace " << Trace << '\n'); NumInvalidTraces += Count; return false; } // Set ParentFunc to BAT parent function or FromFunc itself. BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc); if (!ParentFunc) ParentFunc = FromFunc; ParentFunc->SampleCountInBytes += Count * (To - From); const uint64_t FuncAddress = FromFunc->getAddress(); std::optional FTs = BAT && BAT->isBATFunction(FuncAddress) ? BAT->getFallthroughsInTrace(FuncAddress, From - IsReturn, To) : getFallthroughsInTrace(*FromFunc, Trace, Count, IsReturn); if (!FTs) { LLVM_DEBUG(dbgs() << "Invalid trace " << Trace << '\n'); NumInvalidTraces += Count; return false; } LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " << FromFunc->getPrintName() << ":" << Trace << '\n'); for (auto [From, To] : *FTs) { if (BAT) { From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true); To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false); } doIntraBranch(*ParentFunc, From, To, Count, false); } return true; } std::optional, 16>> DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, const Trace &Trace, uint64_t Count, bool IsReturn) const { SmallVector, 16> Branches; BinaryContext &BC = BF.getBinaryContext(); // Offsets of the trace within this function. const uint64_t From = Trace.From - BF.getAddress(); const uint64_t To = Trace.To - BF.getAddress(); if (From > To) return std::nullopt; // Accept fall-throughs inside pseudo functions (PLT/thunks). // This check has to be above BF.empty as pseudo functions would pass it: // pseudo => ignored => CFG not built => empty. // If we return nullopt, trace would be reported as mismatching disassembled // function contents which it is not. To avoid this, return an empty // fall-through list instead. if (BF.isPseudo()) return Branches; if (!BF.isSimple()) return std::nullopt; assert(BF.hasCFG() && "can only record traces in CFG state"); const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); if (!FromBB || !ToBB) return std::nullopt; // Adjust FromBB if the first LBR is a return from the last instruction in // the previous block (that instruction should be a call). if (IsReturn) { if (From) FromBB = BF.getBasicBlockContainingOffset(From - 1); else LLVM_DEBUG(dbgs() << "return to the function start: " << Trace << '\n'); } else if (Trace.Branch == Trace::EXTERNAL && From == FromBB->getOffset() && !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { const BinaryBasicBlock *PrevBB = BF.getLayout().getBlock(FromBB->getIndex() - 1); if (PrevBB->getSuccessor(FromBB->getLabel())) { const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); if (Instr && BC.MIB->isCall(*Instr)) FromBB = PrevBB; else LLVM_DEBUG(dbgs() << "invalid trace (no call): " << Trace << '\n'); } else { LLVM_DEBUG(dbgs() << "invalid trace: " << Trace << '\n'); } } // Fill out information for fall-through edges. The From and To could be // within the same basic block, e.g. when two call instructions are in the // same block. In this case we skip the processing. if (FromBB == ToBB) return Branches; // Process blocks in the original layout order. BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex()); assert(BB == FromBB && "index mismatch"); while (BB != ToBB) { BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1); assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout"); // Check for bad LBRs. if (!BB->getSuccessor(NextBB->getLabel())) { LLVM_DEBUG(dbgs() << "no fall-through for the trace: " << Trace << '\n'); return std::nullopt; } const MCInst *Instr = BB->getLastNonPseudoInstr(); uint64_t Offset = 0; if (Instr) Offset = BC.MIB->getOffsetWithDefault(*Instr, 0); else Offset = BB->getOffset(); Branches.emplace_back(Offset, NextBB->getOffset()); BB = NextBB; } // Record fall-through jumps for (const auto &[FromOffset, ToOffset] : Branches) { BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(FromOffset); BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(ToOffset); assert(FromBB && ToBB); BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB); BI.Count += Count; } return Branches; } bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred, uint64_t Count) const { if (To > BF.getSize()) return false; if (!BF.hasProfile()) BF.ExecutionCount = 0; BinaryBasicBlock *EntryBB = nullptr; if (To == 0) { BF.ExecutionCount += Count; if (!BF.empty()) EntryBB = &BF.front(); } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) { if (BB->isEntryPoint()) EntryBB = BB; } if (EntryBB) EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); return true; } bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, uint64_t Count) const { if (!BF.isSimple() || From > BF.getSize()) return false; if (!BF.hasProfile()) BF.ExecutionCount = 0; return true; } ErrorOr DataAggregator::parseLBREntry() { LBREntry Res; ErrorOr FromStrRes = parseString('/'); if (std::error_code EC = FromStrRes.getError()) return EC; StringRef OffsetStr = FromStrRes.get(); if (OffsetStr.getAsInteger(0, Res.From)) { reportError("expected hexadecimal number with From address"); Diag << "Found: " << OffsetStr << "\n"; return make_error_code(llvm::errc::io_error); } ErrorOr ToStrRes = parseString('/'); if (std::error_code EC = ToStrRes.getError()) return EC; OffsetStr = ToStrRes.get(); if (OffsetStr.getAsInteger(0, Res.To)) { reportError("expected hexadecimal number with To address"); Diag << "Found: " << OffsetStr << "\n"; return make_error_code(llvm::errc::io_error); } ErrorOr MispredStrRes = parseString('/'); if (std::error_code EC = MispredStrRes.getError()) return EC; StringRef MispredStr = MispredStrRes.get(); // SPE brstack mispredicted flags might be up to two characters long: // 'PN' or 'MN'. Where 'N' optionally appears. bool ValidStrSize = opts::ArmSPE ? MispredStr.size() >= 1 && MispredStr.size() <= 2 : MispredStr.size() == 1; bool SpeTakenBitErr = (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N'); bool PredictionBitErr = !ValidStrSize || (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-'); if (SpeTakenBitErr) reportError("expected 'N' as SPE prediction bit for a not-taken branch"); if (PredictionBitErr) reportError("expected 'P', 'M' or '-' char as a prediction bit"); if (SpeTakenBitErr || PredictionBitErr) { Diag << "Found: " << MispredStr << "\n"; return make_error_code(llvm::errc::io_error); } Res.Mispred = MispredStr[0] == 'M'; static bool MispredWarning = true; if (MispredStr[0] == '-' && MispredWarning) { errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n"; MispredWarning = false; } ErrorOr Rest = parseString(FieldSeparator, true); if (std::error_code EC = Rest.getError()) return EC; if (Rest.get().size() < 5) { reportError("expected rest of LBR entry"); Diag << "Found: " << Rest.get() << "\n"; return make_error_code(llvm::errc::io_error); } return Res; } bool DataAggregator::checkAndConsumeFS() { if (ParsingBuf[0] != FieldSeparator) return false; ParsingBuf = ParsingBuf.drop_front(1); Col += 1; return true; } void DataAggregator::consumeRestOfLine() { size_t LineEnd = ParsingBuf.find_first_of('\n'); if (LineEnd == StringRef::npos) { ParsingBuf = StringRef(); Col = 0; Line += 1; return; } ParsingBuf = ParsingBuf.drop_front(LineEnd + 1); Col = 0; Line += 1; } bool DataAggregator::checkNewLine() { return ParsingBuf[0] == '\n'; } ErrorOr DataAggregator::parseBranchSample() { PerfBranchSample Res; while (checkAndConsumeFS()) { } ErrorOr PIDRes = parseNumberField(FieldSeparator, true); if (std::error_code EC = PIDRes.getError()) return EC; auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) { consumeRestOfLine(); return make_error_code(errc::no_such_process); } if (checkAndConsumeNewLine()) return Res; while (!checkAndConsumeNewLine()) { checkAndConsumeFS(); ErrorOr LBRRes = parseLBREntry(); if (std::error_code EC = LBRRes.getError()) return EC; LBREntry LBR = LBRRes.get(); if (ignoreKernelInterrupt(LBR)) continue; if (!BC->HasFixedLoadAddress) adjustLBR(LBR, MMapInfoIter->second); Res.LBR.push_back(LBR); } return Res; } ErrorOr DataAggregator::parseBasicSample() { while (checkAndConsumeFS()) { } ErrorOr PIDRes = parseNumberField(FieldSeparator, true); if (std::error_code EC = PIDRes.getError()) return EC; auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); if (MMapInfoIter == BinaryMMapInfo.end()) { consumeRestOfLine(); return PerfBasicSample{StringRef(), 0}; } while (checkAndConsumeFS()) { } ErrorOr Event = parseString(FieldSeparator); if (std::error_code EC = Event.getError()) return EC; while (checkAndConsumeFS()) { } ErrorOr AddrRes = parseHexField(FieldSeparator, true); if (std::error_code EC = AddrRes.getError()) return EC; if (!checkAndConsumeNewLine()) { reportError("expected end of line"); return make_error_code(llvm::errc::io_error); } uint64_t Address = *AddrRes; if (!BC->HasFixedLoadAddress) adjustAddress(Address, MMapInfoIter->second); return PerfBasicSample{Event.get(), Address}; } ErrorOr DataAggregator::parseMemSample() { PerfMemSample Res{0, 0}; while (checkAndConsumeFS()) { } ErrorOr PIDRes = parseNumberField(FieldSeparator, true); if (std::error_code EC = PIDRes.getError()) return EC; auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); if (MMapInfoIter == BinaryMMapInfo.end()) { consumeRestOfLine(); return Res; } while (checkAndConsumeFS()) { } ErrorOr Event = parseString(FieldSeparator); if (std::error_code EC = Event.getError()) return EC; if (!Event.get().contains("mem-loads")) { consumeRestOfLine(); return Res; } while (checkAndConsumeFS()) { } ErrorOr AddrRes = parseHexField(FieldSeparator); if (std::error_code EC = AddrRes.getError()) return EC; while (checkAndConsumeFS()) { } ErrorOr PCRes = parseHexField(FieldSeparator, true); if (std::error_code EC = PCRes.getError()) { consumeRestOfLine(); return EC; } if (!checkAndConsumeNewLine()) { reportError("expected end of line"); return make_error_code(llvm::errc::io_error); } uint64_t Address = *AddrRes; if (!BC->HasFixedLoadAddress) adjustAddress(Address, MMapInfoIter->second); return PerfMemSample{PCRes.get(), Address}; } ErrorOr DataAggregator::parseLocationOrOffset() { auto parseOffset = [this]() -> ErrorOr { ErrorOr Res = parseHexField(FieldSeparator); if (std::error_code EC = Res.getError()) return EC; return Location(Res.get()); }; size_t Sep = ParsingBuf.find_first_of(" \n"); if (Sep == StringRef::npos) return parseOffset(); StringRef LookAhead = ParsingBuf.substr(0, Sep); if (!LookAhead.contains(':')) return parseOffset(); ErrorOr BuildID = parseString(':'); if (std::error_code EC = BuildID.getError()) return EC; ErrorOr Offset = parseHexField(FieldSeparator); if (std::error_code EC = Offset.getError()) return EC; return Location(true, BuildID.get(), Offset.get()); } std::error_code DataAggregator::parseAggregatedLBREntry() { enum AggregatedLBREntry : char { INVALID = 0, EVENT_NAME, // E TRACE, // T SAMPLE, // S BRANCH, // B FT, // F FT_EXTERNAL_ORIGIN // f } Type = INVALID; /// The number of fields to parse, set based on \p Type. int AddrNum = 0; int CounterNum = 0; /// Storage for parsed fields. StringRef EventName; std::optional Addr[3]; int64_t Counters[2] = {0}; /// Parse strings: record type and optionally an event name. while (Type == INVALID || Type == EVENT_NAME) { while (checkAndConsumeFS()) { } ErrorOr StrOrErr = parseString(FieldSeparator, Type == EVENT_NAME); if (std::error_code EC = StrOrErr.getError()) return EC; StringRef Str = StrOrErr.get(); if (Type == EVENT_NAME) { EventName = Str; break; } Type = StringSwitch(Str) .Case("T", TRACE) .Case("S", SAMPLE) .Case("E", EVENT_NAME) .Case("B", BRANCH) .Case("F", FT) .Case("f", FT_EXTERNAL_ORIGIN) .Default(INVALID); if (Type == INVALID) { reportError("expected T, S, E, B, F or f"); return make_error_code(llvm::errc::io_error); } using SSI = StringSwitch; AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2); CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1); } /// Parse locations depending on entry type, recording them in \p Addr array. for (int I = 0; I < AddrNum; ++I) { while (checkAndConsumeFS()) { } ErrorOr AddrOrErr = parseLocationOrOffset(); if (std::error_code EC = AddrOrErr.getError()) return EC; Addr[I] = AddrOrErr.get(); } /// Parse counters depending on entry type. for (int I = 0; I < CounterNum; ++I) { while (checkAndConsumeFS()) { } ErrorOr CountOrErr = parseNumberField(FieldSeparator, I + 1 == CounterNum); if (std::error_code EC = CountOrErr.getError()) return EC; Counters[I] = CountOrErr.get(); } /// Expect end of line here. if (!checkAndConsumeNewLine()) { reportError("expected end of line"); return make_error_code(llvm::errc::io_error); } /// Record event name into \p EventNames and return. if (Type == EVENT_NAME) { EventNames.insert(EventName); return std::error_code(); } const uint64_t FromOffset = Addr[0]->Offset; BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset); if (FromFunc) FromFunc->setHasProfileAvailable(); int64_t Count = Counters[0]; int64_t Mispreds = Counters[1]; /// Record basic IP sample into \p BasicSamples and return. if (Type == SAMPLE) { BasicSamples[FromOffset] += Count; NumTotalSamples += Count; return std::error_code(); } const uint64_t ToOffset = Addr[1]->Offset; BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset); if (ToFunc) ToFunc->setHasProfileAvailable(); /// For legacy fall-through types, adjust locations to match Trace container. if (Type == FT || Type == FT_EXTERNAL_ORIGIN) { Addr[2] = Location(Addr[1]->Offset); // Trace To Addr[1] = Location(Addr[0]->Offset); // Trace From // Put a magic value into Trace Branch to differentiate from a full trace. Addr[0] = Location(Type == FT ? Trace::FT_ONLY : Trace::FT_EXTERNAL_ORIGIN); } /// For legacy branch type, mark Trace To to differentite from a full trace. if (Type == BRANCH) { Addr[2] = Location(Trace::BR_ONLY); } /// Record a trace. Trace T{Addr[0]->Offset, Addr[1]->Offset, Addr[2]->Offset}; TakenBranchInfo TI{(uint64_t)Count, (uint64_t)Mispreds}; Traces.emplace_back(T, TI); NumTotalSamples += Count; return std::error_code(); } bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { return opts::IgnoreInterruptLBR && (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); } std::error_code DataAggregator::printLBRHeatMap() { outs() << "PERF2BOLT: parse branch events...\n"; NamedRegionTimer T("buildHeatmap", "Building heatmap", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); if (BC->IsLinuxKernel) { opts::HeatmapMaxAddress = 0xffffffffffffffff; opts::HeatmapMinAddress = KernelBaseAddr; } opts::HeatmapBlockSizes &HMBS = opts::HeatmapBlock; Heatmap HM(HMBS[0], opts::HeatmapMinAddress, opts::HeatmapMaxAddress, getTextSections(BC)); auto getSymbolValue = [&](const MCSymbol *Symbol) -> uint64_t { if (Symbol) if (ErrorOr SymValue = BC->getSymbolValue(*Symbol)) return SymValue.get(); return 0; }; HM.HotStart = getSymbolValue(BC->getHotTextStartSymbol()); HM.HotEnd = getSymbolValue(BC->getHotTextEndSymbol()); if (!NumTotalSamples) { if (opts::BasicAggregation) { errs() << "HEATMAP-ERROR: no basic event samples detected in profile. " "Cannot build heatmap."; } else { errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " "Cannot build heatmap. Use -nl for building heatmap from " "basic events.\n"; } exit(1); } outs() << "HEATMAP: building heat map...\n"; // Register basic samples and perf LBR addresses not covered by fallthroughs. for (const auto &[PC, Hits] : BasicSamples) HM.registerAddress(PC, Hits); for (const auto &[Trace, Info] : Traces) if (Trace.To != Trace::BR_ONLY) HM.registerAddressRange(Trace.From, Trace.To, Info.TakenCount); if (HM.getNumInvalidRanges()) outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n'; if (!HM.size()) { errs() << "HEATMAP-ERROR: no valid traces registered\n"; exit(1); } HM.print(opts::HeatmapOutput); if (opts::HeatmapOutput == "-") { HM.printCDF(opts::HeatmapOutput); HM.printSectionHotness(opts::HeatmapOutput); } else { HM.printCDF(opts::HeatmapOutput + ".csv"); HM.printSectionHotness(opts::HeatmapOutput + "-section-hotness.csv"); } // Provide coarse-grained heatmaps if requested via zoom-out scales for (const uint64_t NewBucketSize : ArrayRef(HMBS).drop_front()) { HM.resizeBucket(NewBucketSize); if (opts::HeatmapOutput == "-") HM.print(opts::HeatmapOutput); else HM.print(formatv("{0}-{1}", opts::HeatmapOutput, NewBucketSize).str()); } return std::error_code(); } void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix) { // LBRs are stored in reverse execution order. NextLBR refers to the next // executed branch record. const LBREntry *NextLBR = nullptr; uint32_t NumEntry = 0; for (const LBREntry &LBR : Sample.LBR) { ++NumEntry; // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) // sometimes record entry 32 as an exact copy of entry 31. This will cause // us to likely record an invalid trace and generate a stale function for // BAT mode (non BAT disassembles the function and is able to ignore this // trace at aggregation time). Drop first 2 entries (last two, in // chronological order) if (NeedsSkylakeFix && NumEntry <= 2) continue; uint64_t TraceTo = NextLBR ? NextLBR->From : Trace::BR_ONLY; NextLBR = &LBR; TakenBranchInfo &Info = TraceMap[Trace{LBR.From, LBR.To, TraceTo}]; ++Info.TakenCount; Info.MispredCount += LBR.Mispred; } // Record LBR addresses not covered by fallthroughs (bottom-of-stack source // and top-of-stack target) as basic samples for heatmap. if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive && !Sample.LBR.empty()) { ++BasicSamples[Sample.LBR.front().To]; ++BasicSamples[Sample.LBR.back().From]; } } void DataAggregator::printLongRangeTracesDiagnostic() const { outs() << "PERF2BOLT: out of range traces involving unknown regions: " << NumLongRangeTraces; if (NumTraces > 0) outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); outs() << "\n"; } static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1, float T2) { if (Denominator == 0) { outs() << "\n"; return 0; } float Percent = Numerator * 100.0f / Denominator; outs() << " ("; if (outs().has_colors()) { if (Percent > T2) outs().changeColor(raw_ostream::RED); else if (Percent > T1) outs().changeColor(raw_ostream::YELLOW); else outs().changeColor(raw_ostream::GREEN); } outs() << format("%.1f%%", Percent); if (outs().has_colors()) outs().resetColor(); outs() << ")\n"; return Percent; } void DataAggregator::printBranchSamplesDiagnostics() const { outs() << "PERF2BOLT: traces mismatching disassembled function contents: " << NumInvalidTraces; if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10) outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " "binary is probably not the same binary used during profiling " "collection. The generated data may be ineffective for improving " "performance\n\n"; printLongRangeTracesDiagnostic(); } void DataAggregator::printBasicSamplesDiagnostics( uint64_t OutOfRangeSamples) const { outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " << OutOfRangeSamples; if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80) outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " "binary is probably not the same binary used during profiling " "collection. The generated data may be ineffective for improving " "performance\n\n"; } void DataAggregator::printBranchStacksDiagnostics( uint64_t IgnoredSamples) const { outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples; if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50) errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " "were attributed to the input binary\n"; } std::error_code DataAggregator::parseBranchEvents() { std::string BranchEventTypeStr = opts::ArmSPE ? "SPE branch events in LBR-format" : "branch events"; outs() << "PERF2BOLT: parse " << BranchEventTypeStr << "...\n"; NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); uint64_t NumEntries = 0; uint64_t NumSamples = 0; uint64_t NumSamplesNoLBR = 0; bool NeedsSkylakeFix = false; while (hasData() && NumTotalSamples < opts::MaxSamples) { ++NumTotalSamples; ErrorOr SampleRes = parseBranchSample(); if (std::error_code EC = SampleRes.getError()) { if (EC == errc::no_such_process) continue; return EC; } ++NumSamples; PerfBranchSample &Sample = SampleRes.get(); if (Sample.LBR.empty()) { ++NumSamplesNoLBR; continue; } NumEntries += Sample.LBR.size(); if (this->BC->isX86() && BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n"; NeedsSkylakeFix = true; } parseLBRSample(Sample, NeedsSkylakeFix); } Traces.reserve(TraceMap.size()); for (const auto &[Trace, Info] : TraceMap) { Traces.emplace_back(Trace, Info); for (const uint64_t Addr : {Trace.Branch, Trace.From}) if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr)) BF->setHasProfileAvailable(); } clear(TraceMap); outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries << " LBR entries\n"; if (NumTotalSamples) { if (NumSamples && NumSamplesNoLBR == NumSamples) { // Note: we don't know if perf2bolt is being used to parse memory samples // at this point. In this case, it is OK to parse zero LBRs. if (!opts::ArmSPE) errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " "LBR. Record profile with perf record -j any or run perf2bolt " "in no-LBR mode with -nl (the performance improvement in -nl " "mode may be limited)\n"; else errs() << "PERF2BOLT-WARNING: All recorded samples for this binary lack " "SPE brstack entries. Make sure you are running Linux perf 6.14 " "or later, otherwise you get zero samples. Record the profile " "with: perf record -e 'arm_spe_0/branch_filter=1/'."; } else { printBranchStacksDiagnostics(NumTotalSamples - NumSamples); } } return std::error_code(); } void DataAggregator::processBranchEvents() { outs() << "PERF2BOLT: processing branch events...\n"; NamedRegionTimer T("processBranch", "Processing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); for (const auto &[Trace, Info] : Traces) { bool IsReturn = checkReturn(Trace.Branch); // Ignore returns. if (!IsReturn && Trace.Branch != Trace::FT_ONLY && Trace.Branch != Trace::FT_EXTERNAL_ORIGIN) doBranch(Trace.Branch, Trace.From, Info.TakenCount, Info.MispredCount); if (Trace.To != Trace::BR_ONLY) doTrace(Trace, Info.TakenCount, IsReturn); } printBranchSamplesDiagnostics(); } std::error_code DataAggregator::parseBasicEvents() { outs() << "PERF2BOLT: parsing basic events (without LBR)...\n"; NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); while (hasData()) { ErrorOr Sample = parseBasicSample(); if (std::error_code EC = Sample.getError()) return EC; if (!Sample->PC) continue; ++NumTotalSamples; if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) BF->setHasProfileAvailable(); ++BasicSamples[Sample->PC]; EventNames.insert(Sample->EventName); } outs() << "PERF2BOLT: read " << NumTotalSamples << " basic samples\n"; return std::error_code(); } void DataAggregator::processBasicEvents() { outs() << "PERF2BOLT: processing basic events (without LBR)...\n"; NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); uint64_t OutOfRangeSamples = 0; for (auto &Sample : BasicSamples) { const uint64_t PC = Sample.first; const uint64_t HitCount = Sample.second; BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); if (!Func) { OutOfRangeSamples += HitCount; continue; } doBasicSample(*Func, PC, HitCount); } printBasicSamplesDiagnostics(OutOfRangeSamples); } std::error_code DataAggregator::parseMemEvents() { outs() << "PERF2BOLT: parsing memory events...\n"; NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); while (hasData()) { ErrorOr Sample = parseMemSample(); if (std::error_code EC = Sample.getError()) return EC; if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) BF->setHasProfileAvailable(); MemSamples.emplace_back(std::move(Sample.get())); } return std::error_code(); } void DataAggregator::processMemEvents() { NamedRegionTimer T("ProcessMemEvents", "Processing mem events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); for (const PerfMemSample &Sample : MemSamples) { uint64_t PC = Sample.PC; uint64_t Addr = Sample.Addr; StringRef FuncName; StringRef MemName; // Try to resolve symbol for PC BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); if (!Func) { LLVM_DEBUG(if (PC != 0) { dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr); }); continue; } FuncName = Func->getOneName(); PC -= Func->getAddress(); // Try to resolve symbol for memory load if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) { MemName = BD->getName(); Addr -= BD->getAddress(); } else if (opts::FilterMemProfile) { // Filter out heap/stack accesses continue; } const Location FuncLoc(!FuncName.empty(), FuncName, PC); const Location AddrLoc(!MemName.empty(), MemName, Addr); FuncMemData *MemData = &NamesToMemEvents[FuncName]; MemData->Name = FuncName; setMemData(*Func, MemData); MemData->update(FuncLoc, AddrLoc); LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n"); } } std::error_code DataAggregator::parsePreAggregatedLBRSamples() { outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); size_t AggregatedLBRs = 0; while (hasData()) { if (std::error_code EC = parseAggregatedLBREntry()) return EC; ++AggregatedLBRs; } outs() << "PERF2BOLT: read " << AggregatedLBRs << " aggregated LBR entries\n"; return std::error_code(); } std::optional DataAggregator::parseCommExecEvent() { size_t LineEnd = ParsingBuf.find_first_of("\n"); if (LineEnd == StringRef::npos) { reportError("expected rest of line"); Diag << "Found: " << ParsingBuf << "\n"; return std::nullopt; } StringRef Line = ParsingBuf.substr(0, LineEnd); size_t Pos = Line.find("PERF_RECORD_COMM exec"); if (Pos == StringRef::npos) return std::nullopt; Line = Line.drop_front(Pos); // Line: // PERF_RECORD_COMM exec: :/" StringRef PIDStr = Line.rsplit(':').second.split('/').first; int32_t PID; if (PIDStr.getAsInteger(10, PID)) { reportError("expected PID"); Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; return std::nullopt; } return PID; } namespace { std::optional parsePerfTime(const StringRef TimeStr) { const StringRef SecTimeStr = TimeStr.split('.').first; const StringRef USecTimeStr = TimeStr.split('.').second; uint64_t SecTime; uint64_t USecTime; if (SecTimeStr.getAsInteger(10, SecTime) || USecTimeStr.getAsInteger(10, USecTime)) return std::nullopt; return SecTime * 1000000ULL + USecTime; } } std::optional DataAggregator::parseForkEvent() { while (checkAndConsumeFS()) { } size_t LineEnd = ParsingBuf.find_first_of("\n"); if (LineEnd == StringRef::npos) { reportError("expected rest of line"); Diag << "Found: " << ParsingBuf << "\n"; return std::nullopt; } StringRef Line = ParsingBuf.substr(0, LineEnd); size_t Pos = Line.find("PERF_RECORD_FORK"); if (Pos == StringRef::npos) { consumeRestOfLine(); return std::nullopt; } ForkInfo FI; const StringRef TimeStr = Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; if (std::optional TimeRes = parsePerfTime(TimeStr)) { FI.Time = *TimeRes; } Line = Line.drop_front(Pos); // Line: // PERF_RECORD_FORK(:):(:) const StringRef ChildPIDStr = Line.split('(').second.split(':').first; if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) { reportError("expected PID"); Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n"; return std::nullopt; } const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first; if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) { reportError("expected PID"); Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n"; return std::nullopt; } consumeRestOfLine(); return FI; } ErrorOr> DataAggregator::parseMMapEvent() { while (checkAndConsumeFS()) { } MMapInfo ParsedInfo; size_t LineEnd = ParsingBuf.find_first_of("\n"); if (LineEnd == StringRef::npos) { reportError("expected rest of line"); Diag << "Found: " << ParsingBuf << "\n"; return make_error_code(llvm::errc::io_error); } StringRef Line = ParsingBuf.substr(0, LineEnd); size_t Pos = Line.find("PERF_RECORD_MMAP2"); if (Pos == StringRef::npos) { consumeRestOfLine(); return std::make_pair(StringRef(), ParsedInfo); } // Line: // { .* .: }PERF_RECORD_MMAP2 /: .* const StringRef TimeStr = Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; if (std::optional TimeRes = parsePerfTime(TimeStr)) ParsedInfo.Time = *TimeRes; Line = Line.drop_front(Pos); // Line: // PERF_RECORD_MMAP2 /: [() .*]: .* StringRef FileName = Line.rsplit(FieldSeparator).second; if (FileName.starts_with("//") || FileName.starts_with("[")) { consumeRestOfLine(); return std::make_pair(StringRef(), ParsedInfo); } FileName = sys::path::filename(FileName); const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first; if (PIDStr.getAsInteger(10, ParsedInfo.PID)) { reportError("expected PID"); Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; return make_error_code(llvm::errc::io_error); } const StringRef BaseAddressStr = Line.split('[').second.split('(').first; if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) { reportError("expected base address"); Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n"; return make_error_code(llvm::errc::io_error); } const StringRef SizeStr = Line.split('(').second.split(')').first; if (SizeStr.getAsInteger(0, ParsedInfo.Size)) { reportError("expected mmaped size"); Diag << "Found: " << SizeStr << "in '" << Line << "'\n"; return make_error_code(llvm::errc::io_error); } const StringRef OffsetStr = Line.split('@').second.ltrim().split(FieldSeparator).first; if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) { reportError("expected mmaped page-aligned offset"); Diag << "Found: " << OffsetStr << "in '" << Line << "'\n"; return make_error_code(llvm::errc::io_error); } consumeRestOfLine(); return std::make_pair(FileName, ParsedInfo); } std::error_code DataAggregator::parseMMapEvents() { outs() << "PERF2BOLT: parsing perf-script mmap events output\n"; NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); std::multimap GlobalMMapInfo; while (hasData()) { ErrorOr> FileMMapInfoRes = parseMMapEvent(); if (std::error_code EC = FileMMapInfoRes.getError()) return EC; std::pair FileMMapInfo = FileMMapInfoRes.get(); if (FileMMapInfo.second.PID == -1) continue; if (FileMMapInfo.first == "(deleted)") continue; GlobalMMapInfo.insert(FileMMapInfo); } LLVM_DEBUG({ dbgs() << "FileName -> mmap info:\n" << " Filename : PID [MMapAddr, Size, Offset]\n"; for (const auto &[Name, MMap] : GlobalMMapInfo) dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID, MMap.MMapAddress, MMap.Size, MMap.Offset); }); StringRef NameToUse = llvm::sys::path::filename(BC->getFilename()); if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) { errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName << "\" for profile matching\n"; NameToUse = BuildIDBinaryName; } auto Range = GlobalMMapInfo.equal_range(NameToUse); for (MMapInfo &MMapInfo : llvm::make_second_range(make_range(Range))) { if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) { // Check that the binary mapping matches one of the segments. bool MatchFound = llvm::any_of( llvm::make_second_range(BC->SegmentMapInfo), [&](SegmentInfo &SegInfo) { // The mapping is page-aligned and hence the MMapAddress could be // different from the segment start address. We cannot know the page // size of the mapping, but we know it should not exceed the segment // alignment value. Hence we are performing an approximate check. return SegInfo.Address >= MMapInfo.MMapAddress && SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment && SegInfo.IsExecutable; }); if (!MatchFound) { errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n'; continue; } } // Set base address for shared objects. if (!BC->HasFixedLoadAddress) { std::optional BaseAddress = BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset); if (!BaseAddress) { errs() << "PERF2BOLT-WARNING: unable to find base address of the " "binary when memory mapped at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) << ". Ignoring profile data for this mapping\n"; continue; } MMapInfo.BaseAddress = *BaseAddress; } // Try to add MMapInfo to the map and update its size. Large binaries may // span to multiple text segments, so the mapping is inserted only on the // first occurrence. if (!BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)).second) assert(MMapInfo.BaseAddress == BinaryMMapInfo[MMapInfo.PID].BaseAddress && "Base address on multiple segment mappings should match"); // Update mapping size. const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size; const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress; if (Size > BinaryMMapInfo[MMapInfo.PID].Size) BinaryMMapInfo[MMapInfo.PID].Size = Size; } if (BinaryMMapInfo.empty()) { if (errs().has_colors()) errs().changeColor(raw_ostream::RED); errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \"" << BC->getFilename() << "\"."; if (!GlobalMMapInfo.empty()) { errs() << " Profile for the following binary name(s) is available:\n"; for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE; I = GlobalMMapInfo.upper_bound(I->first)) errs() << " " << I->first << '\n'; errs() << "Please rename the input binary.\n"; } else { errs() << " Failed to extract any binary name from a profile.\n"; } if (errs().has_colors()) errs().resetColor(); exit(1); } return std::error_code(); } std::error_code DataAggregator::parseTaskEvents() { outs() << "PERF2BOLT: parsing perf-script task events output\n"; NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); while (hasData()) { if (std::optional CommInfo = parseCommExecEvent()) { // Remove forked child that ran execve auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo); if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) BinaryMMapInfo.erase(MMapInfoIter); consumeRestOfLine(); continue; } std::optional ForkInfo = parseForkEvent(); if (!ForkInfo) continue; if (ForkInfo->ParentPID == ForkInfo->ChildPID) continue; if (ForkInfo->Time == 0) { // Process was forked and mmaped before perf ran. In this case the child // should have its own mmap entry unless it was execve'd. continue; } auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID); if (MMapInfoIter == BinaryMMapInfo.end()) continue; MMapInfo MMapInfo = MMapInfoIter->second; MMapInfo.PID = ForkInfo->ChildPID; MMapInfo.Forked = true; BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); } outs() << "PERF2BOLT: input binary is associated with " << BinaryMMapInfo.size() << " PID(s)\n"; LLVM_DEBUG({ for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo)) outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI.PID, (MMI.Forked ? " (forked)" : ""), MMI.MMapAddress, MMI.Size); }); return std::error_code(); } std::optional> DataAggregator::parseNameBuildIDPair() { while (checkAndConsumeFS()) { } ErrorOr BuildIDStr = parseString(FieldSeparator, true); if (std::error_code EC = BuildIDStr.getError()) return std::nullopt; // If one of the strings is missing, don't issue a parsing error, but still // do not return a value. consumeAllRemainingFS(); if (checkNewLine()) return std::nullopt; ErrorOr NameStr = parseString(FieldSeparator, true); if (std::error_code EC = NameStr.getError()) return std::nullopt; consumeRestOfLine(); return std::make_pair(NameStr.get(), BuildIDStr.get()); } bool DataAggregator::hasAllBuildIDs() { const StringRef SavedParsingBuf = ParsingBuf; if (!hasData()) return false; bool HasInvalidEntries = false; while (hasData()) { if (!parseNameBuildIDPair()) { HasInvalidEntries = true; break; } } ParsingBuf = SavedParsingBuf; return !HasInvalidEntries; } std::optional DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { const StringRef SavedParsingBuf = ParsingBuf; StringRef FileName; while (hasData()) { std::optional> IDPair = parseNameBuildIDPair(); if (!IDPair) { consumeRestOfLine(); continue; } if (IDPair->second.starts_with(FileBuildID)) { FileName = sys::path::filename(IDPair->first); break; } } ParsingBuf = SavedParsingBuf; if (!FileName.empty()) return FileName; return std::nullopt; } std::error_code DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { std::error_code EC; raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); if (EC) return EC; bool WriteMemLocs = false; auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) { if (WriteMemLocs) OutFile << (Loc.IsSymbol ? "4 " : "3 "); else OutFile << (Loc.IsSymbol ? "1 " : "0 "); OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name)) << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator; }; uint64_t BranchValues = 0; uint64_t MemValues = 0; if (BAT) OutFile << "boltedcollection\n"; if (opts::BasicAggregation) { OutFile << "no_lbr"; for (const StringMapEntry &Entry : EventNames) OutFile << " " << Entry.getKey(); OutFile << "\n"; for (const auto &KV : NamesToBasicSamples) { const FuncBasicSampleData &FSD = KV.second; for (const BasicSampleInfo &SI : FSD.Data) { writeLocation(SI.Loc); OutFile << SI.Hits << "\n"; ++BranchValues; } } } else { for (const auto &KV : NamesToBranches) { const FuncBranchData &FBD = KV.second; for (const BranchInfo &BI : FBD.Data) { writeLocation(BI.From); writeLocation(BI.To); OutFile << BI.Mispreds << " " << BI.Branches << "\n"; ++BranchValues; } for (const BranchInfo &BI : FBD.EntryData) { // Do not output if source is a known symbol, since this was already // accounted for in the source function if (BI.From.IsSymbol) continue; writeLocation(BI.From); writeLocation(BI.To); OutFile << BI.Mispreds << " " << BI.Branches << "\n"; ++BranchValues; } } WriteMemLocs = true; for (const auto &KV : NamesToMemEvents) { const FuncMemData &FMD = KV.second; for (const MemInfo &MemEvent : FMD.Data) { writeLocation(MemEvent.Offset); writeLocation(MemEvent.Addr); OutFile << MemEvent.Count << "\n"; ++MemValues; } } } outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues << " memory objects to " << OutputFilename << "\n"; return std::error_code(); } std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, StringRef OutputFilename) const { std::error_code EC; raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); if (EC) return EC; yaml::bolt::BinaryProfile BP; const MCPseudoProbeDecoder *PseudoProbeDecoder = opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; // Fill out the header info. BP.Header.Version = 1; BP.Header.FileName = std::string(BC.getFilename()); std::optional BuildID = BC.getFileBuildID(); BP.Header.Id = BuildID ? std::string(*BuildID) : ""; BP.Header.Origin = std::string(getReaderName()); // Only the input binary layout order is supported. BP.Header.IsDFSOrder = false; // FIXME: Need to match hash function used to produce BAT hashes. BP.Header.HashFunction = HashFunction::Default; ListSeparator LS(","); raw_string_ostream EventNamesOS(BP.Header.EventNames); for (const StringMapEntry &EventEntry : EventNames) EventNamesOS << LS << EventEntry.first().str(); BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_BASIC : BinaryFunction::PF_BRANCH; // Add probe inline tree nodes. YAMLProfileWriter::InlineTreeDesc InlineTree; if (PseudoProbeDecoder) std::tie(BP.PseudoProbeDesc, InlineTree) = YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder); if (!opts::BasicAggregation) { // Convert profile for functions not covered by BAT for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &Function = BFI.second; if (!Function.hasProfile()) continue; if (BAT->isBATFunction(Function.getAddress())) continue; BP.Functions.emplace_back(YAMLProfileWriter::convert( Function, /*UseDFS=*/false, InlineTree, BAT)); } for (const auto &KV : NamesToBranches) { const StringRef FuncName = KV.first; const FuncBranchData &Branches = KV.second; yaml::bolt::BinaryFunctionProfile YamlBF; BinaryData *BD = BC.getBinaryDataByName(FuncName); assert(BD); uint64_t FuncAddress = BD->getAddress(); if (!BAT->isBATFunction(FuncAddress)) continue; BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress); assert(BF); YamlBF.Name = getLocationName(*BF, BAT); YamlBF.Id = BF->getFunctionNumber(); YamlBF.Hash = BAT->getBFHash(FuncAddress); YamlBF.ExecCount = BF->getKnownExecutionCount(); YamlBF.ExternEntryCount = BF->getExternEntryCount(); YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress); const BoltAddressTranslation::BBHashMapTy &BlockMap = BAT->getBBHashMap(FuncAddress); YamlBF.Blocks.resize(YamlBF.NumBasicBlocks); for (auto &&[Entry, YamlBB] : llvm::zip(BlockMap, YamlBF.Blocks)) { const auto &Block = Entry.second; YamlBB.Hash = Block.Hash; YamlBB.Index = Block.Index; } // Lookup containing basic block offset and index auto getBlock = [&BlockMap](uint32_t Offset) { auto BlockIt = BlockMap.upper_bound(Offset); if (LLVM_UNLIKELY(BlockIt == BlockMap.begin())) { errs() << "BOLT-ERROR: invalid BAT section\n"; exit(1); } --BlockIt; return std::pair(BlockIt->first, BlockIt->second.Index); }; for (const BranchInfo &BI : Branches.Data) { using namespace yaml::bolt; const auto &[BlockOffset, BlockIndex] = getBlock(BI.From.Offset); BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex]; if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) { // Internal branch const unsigned SuccIndex = getBlock(BI.To.Offset).second; auto &SI = YamlBB.Successors.emplace_back(SuccessorInfo{SuccIndex}); SI.Count = BI.Branches; SI.Mispreds = BI.Mispreds; } else { // Call const uint32_t Offset = BI.From.Offset - BlockOffset; auto &CSI = YamlBB.CallSites.emplace_back(CallSiteInfo{Offset}); CSI.Count = BI.Branches; CSI.Mispreds = BI.Mispreds; if (const BinaryData *BD = BC.getBinaryDataByName(BI.To.Name)) YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT, BI.To.Offset); } } // Set entry counts, similar to DataReader::readProfile. for (const BranchInfo &BI : Branches.EntryData) { if (!BlockMap.isInputBlock(BI.To.Offset)) { if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: Unexpected EntryData in " << FuncName << " at 0x" << Twine::utohexstr(BI.To.Offset) << '\n'; continue; } const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset); YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches; } if (PseudoProbeDecoder) { DenseMap InlineTreeNodeId; if (BF->getGUID()) { std::tie(YamlBF.InlineTree, InlineTreeNodeId) = YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF->getGUID()); } // Fetch probes belonging to all fragments const AddressProbesMap &ProbeMap = PseudoProbeDecoder->getAddress2ProbesMap(); BinaryFunction::FragmentsSetTy Fragments(BF->Fragments); Fragments.insert(BF); DenseMap< uint32_t, std::vector>> BlockProbes; for (const BinaryFunction *F : Fragments) { const uint64_t FuncAddr = F->getAddress(); for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) { const uint32_t OutputAddress = Probe.getAddress(); const uint32_t InputOffset = BAT->translate( FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true); const unsigned BlockIndex = getBlock(InputOffset).second; BlockProbes[BlockIndex].emplace_back(Probe); } } for (auto &[Block, Probes] : BlockProbes) { YamlBF.Blocks[Block].PseudoProbes = YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId); } } // Skip printing if there's no profile data llvm::erase_if( YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { auto HasCount = [](const auto &SI) { return SI.Count; }; bool HasAnyCount = YamlBB.ExecCount || llvm::any_of(YamlBB.Successors, HasCount) || llvm::any_of(YamlBB.CallSites, HasCount); return !HasAnyCount; }); BP.Functions.emplace_back(YamlBF); } } // Write the profile. yaml::Output Out(OutFile, nullptr, 0); Out << BP; return std::error_code(); } void DataAggregator::dump() const { DataReader::dump(); } void DataAggregator::dump(const PerfBranchSample &Sample) const { Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n"; for (const LBREntry &LBR : Sample.LBR) Diag << LBR << '\n'; } void DataAggregator::dump(const PerfMemSample &Sample) const { Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n"; }