diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 8bdb319748c6..6ff1a66e76c6 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -154,20 +154,33 @@ private: /// Perf utility full path name std::string PerfPath; + enum PerfProcessType { + BUILDIDS = 0, + MAIN_EVENTS, + MEM_EVENTS, + MMAP_EVENTS, + TASK_EVENTS + }; + friend raw_ostream &operator<<(raw_ostream &OS, const PerfProcessType &T); + /// Perf process spawning bookkeeping struct PerfProcessInfo { + static constexpr StringLiteral EventNamesStr[] = {"BUILDIDS", "MAIN", "MEM", + "MMAP", "TASK"}; + + enum PerfProcessType Type; bool IsFinished{false}; - sys::ProcessInfo PI; - SmallVector StdoutPath; - SmallVector StderrPath; + sys::ProcessInfo PI{}; + SmallVector StdoutPath{}; + SmallVector StderrPath{}; }; /// Process info for spawned processes - PerfProcessInfo BuildIDProcessInfo; - PerfProcessInfo MainEventsPPI; - PerfProcessInfo MemEventsPPI; - PerfProcessInfo MMapEventsPPI; - PerfProcessInfo TaskEventsPPI; + PerfProcessInfo BuildIDProcessInfo = {PerfProcessType::BUILDIDS}; + PerfProcessInfo MainEventsPPI = {PerfProcessType::MAIN_EVENTS}; + PerfProcessInfo MemEventsPPI = {PerfProcessType::MEM_EVENTS}; + PerfProcessInfo MMapEventsPPI = {PerfProcessType::MMAP_EVENTS}; + PerfProcessInfo TaskEventsPPI = {PerfProcessType::TASK_EVENTS}; /// Kernel VM starts at fixed based address /// https://www.kernel.org/doc/Documentation/x86/x86_64/mm.txt @@ -244,6 +257,9 @@ private: /// parsing. void launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, StringRef Args); + /// Helps to generate pre-parsed perf text profile. + ErrorOr getFileSize(StringRef File); + /// Delete all temporary files created to hold the output generated by spawned /// subprocesses during the aggregation job void deleteTempFiles(); @@ -450,6 +466,51 @@ private: /// an external tool. std::error_code parsePreAggregatedLBRSamples(); + /// Dump pre-parsed perf profile data into a single file. + /// The generator relies on the aggregator work to spawn the required + /// perf-script jobs based on the the aggregation type, and merges + /// their results into a single file. + /// This hybrid profile contains all required events such as BuildID, + /// MMAP, TASK, MAIN (brstack or basic samples), or MEM for the aggregation. + /// The generator also creates a file header, where these events + /// are listed along with the length information of their contents. + /// The given length numbers in the header are in bytes, they are used + /// as an offset in the pre-parsed profile. + /// Some of these events are required to be presented in the file. + /// + /// Short description of supported events: + /// MEM: Optional. Parsing memory profile is enabled by default, unless + /// '--itrace' aggregation is set. In the latter case MEM profile + /// won't be added into the pre-parsed profile. Note that, currently + /// mem events only supported if they were gathered on X86_64. + /// MMAP: Compulsory, the mmap data is required to be in the file. + /// BUILDID: Ignored when buildid information doesn't exist in the input + /// profile. In that case, must use `--ignore-build-id`. + /// TASK: If task related data exists in the input profile, + /// Perf2bolt will always parse it. + /// MAIN: Compulsory; the MAIN events always have to be represented in the + /// file. Main events could be either 'brstack' or 'basic' sample data + /// based on how it was collected by Linux Perf. + /// + /// Example how you can generate pre-parsed profile for 'basic' aggregation: + /// perf2bolt -p perf.data BINARY -o perf.text --ba --generate-perf-script + /// + /// This is how a pre-parsed profile data looks like for Basic Aggregation: + /// PERFTEXT;BUILDIDS=32;MMAP=2DC6C0;MAIN=1388;TASK=55730;MEM=128; + /// abcd1234 /example/bin1 + /// ... + /// bin1 1234 ... PERF_RECORD_MMAP2 1234/1234: ... r-xp /example/bin1 + /// ... + /// bin1 1234 ... PERF_RECORD_COMM exec: bin1:1234/1234 + /// bin1 1234 ... PERF_RECORD_EXIT(1234:1234):(20469:20469) + /// ... + /// 1234 branch: abcd1234 abcd1237 + /// 1234 branch: abcd5678 abce9876 + /// ... + /// 1234 mem-loads: efgh1234 efgh1234 + /// 1234 mem-loads: efgh4567 efgh8910 + Error generatePerfTextData(); + /// If \p Address falls into the binary address space based on memory /// mapping info \p MMI, then adjust it for further processing by subtracting /// the base load address. External addresses, i.e. addresses that do not @@ -600,6 +661,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, OS << " ... " << Twine::utohexstr(T.To); return OS; } + +inline raw_ostream &operator<<(raw_ostream &OS, + const DataAggregator::PerfProcessType &T) { + OS << DataAggregator::PerfProcessInfo::EventNamesStr[T]; + return OS; +} } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 7c6abe435fa2..c770656dbadf 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -127,6 +127,11 @@ cl::opt "perf-script output in a textual format"), cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory)); +cl::opt GeneratePerfTextProfile( + "generate-perf-script", + cl::desc("Dump perf-script jobs' output into a file"), cl::Hidden, + cl::cat(AggregatorCategory)); + static cl::opt TimeAggregator("time-aggr", cl::desc("time BOLT aggregator"), @@ -141,6 +146,8 @@ namespace { const char TimerGroupName[] = "aggregator"; const char TimerGroupDesc[] = "Aggregator"; +constexpr const StringLiteral PerfTextMagicStr = "PERFTEXT"; + std::vector getTextSections(const BinaryContext *BC) { std::vector sections; for (BinarySection &Section : BC->sections()) { @@ -169,6 +176,15 @@ void deleteTempFile(const std::string &FileName) { } } +ErrorOr DataAggregator::getFileSize(StringRef File) { + uint64_t Size; + if (std::error_code EC = sys::fs::file_size(File, Size)) { + errs() << "unable to obtain file size: " << EC.message() << "\n"; + return EC; + } + return Size; +} + void DataAggregator::deleteTempFiles() { for (std::string &FileName : TempFiles) deleteTempFile(FileName); @@ -382,6 +398,65 @@ void DataAggregator::parsePreAggregated() { } } +Error DataAggregator::generatePerfTextData() { + std::error_code EC; + raw_fd_ostream OutFile(opts::OutputFilename, EC, sys::fs::OpenFlags::OF_None); + if (EC) { + errs() << "error opening output file: " << EC.message() << "\n"; + return errorCodeToError(EC); + } + + SmallVector ProcessInfos = { + &BuildIDProcessInfo, &MMapEventsPPI, &MainEventsPPI, &TaskEventsPPI}; + if (opts::ParseMemProfile) + ProcessInfos.push_back(&MemEventsPPI); + + // Create a file header as a Table of Contents. + // Initially pre-allocate sufficient space for the header at the beginning of + // the file. + // The header has a maximum length of 132 character (pre-calculated value + // including the magic strings, event names, their maximum sizes, + // and the field separators). + // PERFTEXT;EVENT1={$SIZE};EVENT2={$SIZE}... + // Event sizes are printed in hexadecimal format to ensure a predictable + // length. + OutFile << std::string(132, ' ') << "\n"; + std::string Header; + raw_string_ostream SS(Header); + SS << PerfTextMagicStr << ";"; + for (const auto PPI : ProcessInfos) { + std::string Error; + auto PathData = PPI->StdoutPath.data(); + sys::Wait(PPI->PI, std::nullopt, &Error); + if (!Error.empty()) { + errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n"; + return errorCodeToError(make_error_code(llvm::errc::no_child_process)); + } + + ErrorOr FsRes = getFileSize(PathData); + if (std::error_code EC = FsRes.getError()) + return errorCodeToError(EC); + SS << PPI->Type << formatv("={0:x-};", *FsRes); + + // Merge all perf-scripts jobs' output into the single OutputFile + ErrorOr> MB = + MemoryBuffer::getFileOrSTDIN(PathData); + if (std::error_code EC = MB.getError()) { + errs() << "Cannot open " << PathData << ": " << EC.message() << "\n"; + return errorCodeToError(EC); + } + OutFile << (*MB)->getBuffer(); + } + + OutFile.seek(0); + OutFile << Header; + OutFile.close(); + outs() << "PERF2BOLT: Profile is saved to file " << opts::OutputFilename + << "\n"; + deleteTempFiles(); + return Error::success(); +} + void DataAggregator::filterBinaryMMapInfo() { if (opts::FilterPID) { auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID); @@ -594,7 +669,13 @@ void DataAggregator::imputeFallThroughs() { Error DataAggregator::preprocessProfile(BinaryContext &BC) { this->BC = &BC; - if (opts::ReadPreAggregated) { + if (opts::GeneratePerfTextProfile) { + if (Error E = generatePerfTextData()) { + deleteTempFiles(); + exit(1); + } + exit(0); + } else if (opts::ReadPreAggregated) { parsePreAggregated(); } else { parsePerfData(BC);