[BOLT][Perf2bolt] Add support to generate pre-parsed perf data (#171144)
Adding a generator into Perf2bolt is the initial step to support the large end-to-end tests for Arm SPE. This functionality proves unified format of pre-parsed profile that Perf2bolt is able to consume. Why does the test need to have a textual format SPE profile? * To collect an Arm SPE profile by Linux Perf, it needs to have an arm developer device which has SPE support. * To decode SPE data, it also needs to have the proper version of Linux Perf. * The minimum required version of Linux Perf is v6.15. Bypassing these technical difficulties, that easier to prove a pre-generated textual profile format. The generator relies on the aggregator work to spawn the required perf-script jobs based on the the aggregation type, and merges the results of the pref-script jobs into a single file. This hybrid profile will contain all required events such as BuildID, MMAP, TASK, BRSTACK, or MEM event for the aggregation. Two examples below how to generate a pre-parsed perf data as an input for ARM SPE aggregation: `perf2bolt -p perf.data BINARY -o perf.text --spe --generate-perf-script` Or for basic aggregation: `perf2bolt -p perf.data BINARY -o perf.text --ba --generate-perf-script`
This commit is contained in:
parent
f1d4dda804
commit
733bc3409b
@ -154,20 +154,33 @@ private:
|
||||
/// Perf utility full path name
|
||||
std::string PerfPath;
|
||||
|
||||
enum PerfProcessType {
|
||||
BUILDIDS = 0,
|
||||
MAIN_EVENTS,
|
||||
MEM_EVENTS,
|
||||
MMAP_EVENTS,
|
||||
TASK_EVENTS
|
||||
};
|
||||
friend raw_ostream &operator<<(raw_ostream &OS, const PerfProcessType &T);
|
||||
|
||||
/// Perf process spawning bookkeeping
|
||||
struct PerfProcessInfo {
|
||||
static constexpr StringLiteral EventNamesStr[] = {"BUILDIDS", "MAIN", "MEM",
|
||||
"MMAP", "TASK"};
|
||||
|
||||
enum PerfProcessType Type;
|
||||
bool IsFinished{false};
|
||||
sys::ProcessInfo PI;
|
||||
SmallVector<char, 256> StdoutPath;
|
||||
SmallVector<char, 256> StderrPath;
|
||||
sys::ProcessInfo PI{};
|
||||
SmallVector<char, 256> StdoutPath{};
|
||||
SmallVector<char, 256> StderrPath{};
|
||||
};
|
||||
|
||||
/// Process info for spawned processes
|
||||
PerfProcessInfo BuildIDProcessInfo;
|
||||
PerfProcessInfo MainEventsPPI;
|
||||
PerfProcessInfo MemEventsPPI;
|
||||
PerfProcessInfo MMapEventsPPI;
|
||||
PerfProcessInfo TaskEventsPPI;
|
||||
PerfProcessInfo BuildIDProcessInfo = {PerfProcessType::BUILDIDS};
|
||||
PerfProcessInfo MainEventsPPI = {PerfProcessType::MAIN_EVENTS};
|
||||
PerfProcessInfo MemEventsPPI = {PerfProcessType::MEM_EVENTS};
|
||||
PerfProcessInfo MMapEventsPPI = {PerfProcessType::MMAP_EVENTS};
|
||||
PerfProcessInfo TaskEventsPPI = {PerfProcessType::TASK_EVENTS};
|
||||
|
||||
/// Kernel VM starts at fixed based address
|
||||
/// https://www.kernel.org/doc/Documentation/x86/x86_64/mm.txt
|
||||
@ -244,6 +257,9 @@ private:
|
||||
/// parsing.
|
||||
void launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, StringRef Args);
|
||||
|
||||
/// Helps to generate pre-parsed perf text profile.
|
||||
ErrorOr<uint64_t> getFileSize(StringRef File);
|
||||
|
||||
/// Delete all temporary files created to hold the output generated by spawned
|
||||
/// subprocesses during the aggregation job
|
||||
void deleteTempFiles();
|
||||
@ -450,6 +466,51 @@ private:
|
||||
/// an external tool.
|
||||
std::error_code parsePreAggregatedLBRSamples();
|
||||
|
||||
/// Dump pre-parsed perf profile data into a single file.
|
||||
/// The generator relies on the aggregator work to spawn the required
|
||||
/// perf-script jobs based on the the aggregation type, and merges
|
||||
/// their results into a single file.
|
||||
/// This hybrid profile contains all required events such as BuildID,
|
||||
/// MMAP, TASK, MAIN (brstack or basic samples), or MEM for the aggregation.
|
||||
/// The generator also creates a file header, where these events
|
||||
/// are listed along with the length information of their contents.
|
||||
/// The given length numbers in the header are in bytes, they are used
|
||||
/// as an offset in the pre-parsed profile.
|
||||
/// Some of these events are required to be presented in the file.
|
||||
///
|
||||
/// Short description of supported events:
|
||||
/// MEM: Optional. Parsing memory profile is enabled by default, unless
|
||||
/// '--itrace' aggregation is set. In the latter case MEM profile
|
||||
/// won't be added into the pre-parsed profile. Note that, currently
|
||||
/// mem events only supported if they were gathered on X86_64.
|
||||
/// MMAP: Compulsory, the mmap data is required to be in the file.
|
||||
/// BUILDID: Ignored when buildid information doesn't exist in the input
|
||||
/// profile. In that case, must use `--ignore-build-id`.
|
||||
/// TASK: If task related data exists in the input profile,
|
||||
/// Perf2bolt will always parse it.
|
||||
/// MAIN: Compulsory; the MAIN events always have to be represented in the
|
||||
/// file. Main events could be either 'brstack' or 'basic' sample data
|
||||
/// based on how it was collected by Linux Perf.
|
||||
///
|
||||
/// Example how you can generate pre-parsed profile for 'basic' aggregation:
|
||||
/// perf2bolt -p perf.data BINARY -o perf.text --ba --generate-perf-script
|
||||
///
|
||||
/// This is how a pre-parsed profile data looks like for Basic Aggregation:
|
||||
/// PERFTEXT;BUILDIDS=32;MMAP=2DC6C0;MAIN=1388;TASK=55730;MEM=128;
|
||||
/// abcd1234 /example/bin1
|
||||
/// ...
|
||||
/// bin1 1234 ... PERF_RECORD_MMAP2 1234/1234: ... r-xp /example/bin1
|
||||
/// ...
|
||||
/// bin1 1234 ... PERF_RECORD_COMM exec: bin1:1234/1234
|
||||
/// bin1 1234 ... PERF_RECORD_EXIT(1234:1234):(20469:20469)
|
||||
/// ...
|
||||
/// 1234 branch: abcd1234 abcd1237
|
||||
/// 1234 branch: abcd5678 abce9876
|
||||
/// ...
|
||||
/// 1234 mem-loads: efgh1234 efgh1234
|
||||
/// 1234 mem-loads: efgh4567 efgh8910
|
||||
Error generatePerfTextData();
|
||||
|
||||
/// If \p Address falls into the binary address space based on memory
|
||||
/// mapping info \p MMI, then adjust it for further processing by subtracting
|
||||
/// the base load address. External addresses, i.e. addresses that do not
|
||||
@ -600,6 +661,12 @@ inline raw_ostream &operator<<(raw_ostream &OS,
|
||||
OS << " ... " << Twine::utohexstr(T.To);
|
||||
return OS;
|
||||
}
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS,
|
||||
const DataAggregator::PerfProcessType &T) {
|
||||
OS << DataAggregator::PerfProcessInfo::EventNamesStr[T];
|
||||
return OS;
|
||||
}
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
|
||||
@ -127,6 +127,11 @@ cl::opt<std::string>
|
||||
"perf-script output in a textual format"),
|
||||
cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory));
|
||||
|
||||
cl::opt<bool> GeneratePerfTextProfile(
|
||||
"generate-perf-script",
|
||||
cl::desc("Dump perf-script jobs' output into a file"), cl::Hidden,
|
||||
cl::cat(AggregatorCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
TimeAggregator("time-aggr",
|
||||
cl::desc("time BOLT aggregator"),
|
||||
@ -141,6 +146,8 @@ namespace {
|
||||
const char TimerGroupName[] = "aggregator";
|
||||
const char TimerGroupDesc[] = "Aggregator";
|
||||
|
||||
constexpr const StringLiteral PerfTextMagicStr = "PERFTEXT";
|
||||
|
||||
std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
|
||||
std::vector<SectionNameAndRange> sections;
|
||||
for (BinarySection &Section : BC->sections()) {
|
||||
@ -169,6 +176,15 @@ void deleteTempFile(const std::string &FileName) {
|
||||
}
|
||||
}
|
||||
|
||||
ErrorOr<uint64_t> DataAggregator::getFileSize(StringRef File) {
|
||||
uint64_t Size;
|
||||
if (std::error_code EC = sys::fs::file_size(File, Size)) {
|
||||
errs() << "unable to obtain file size: " << EC.message() << "\n";
|
||||
return EC;
|
||||
}
|
||||
return Size;
|
||||
}
|
||||
|
||||
void DataAggregator::deleteTempFiles() {
|
||||
for (std::string &FileName : TempFiles)
|
||||
deleteTempFile(FileName);
|
||||
@ -382,6 +398,65 @@ void DataAggregator::parsePreAggregated() {
|
||||
}
|
||||
}
|
||||
|
||||
Error DataAggregator::generatePerfTextData() {
|
||||
std::error_code EC;
|
||||
raw_fd_ostream OutFile(opts::OutputFilename, EC, sys::fs::OpenFlags::OF_None);
|
||||
if (EC) {
|
||||
errs() << "error opening output file: " << EC.message() << "\n";
|
||||
return errorCodeToError(EC);
|
||||
}
|
||||
|
||||
SmallVector<PerfProcessInfo *, 5> ProcessInfos = {
|
||||
&BuildIDProcessInfo, &MMapEventsPPI, &MainEventsPPI, &TaskEventsPPI};
|
||||
if (opts::ParseMemProfile)
|
||||
ProcessInfos.push_back(&MemEventsPPI);
|
||||
|
||||
// Create a file header as a Table of Contents.
|
||||
// Initially pre-allocate sufficient space for the header at the beginning of
|
||||
// the file.
|
||||
// The header has a maximum length of 132 character (pre-calculated value
|
||||
// including the magic strings, event names, their maximum sizes,
|
||||
// and the field separators).
|
||||
// PERFTEXT;EVENT1={$SIZE};EVENT2={$SIZE}...
|
||||
// Event sizes are printed in hexadecimal format to ensure a predictable
|
||||
// length.
|
||||
OutFile << std::string(132, ' ') << "\n";
|
||||
std::string Header;
|
||||
raw_string_ostream SS(Header);
|
||||
SS << PerfTextMagicStr << ";";
|
||||
for (const auto PPI : ProcessInfos) {
|
||||
std::string Error;
|
||||
auto PathData = PPI->StdoutPath.data();
|
||||
sys::Wait(PPI->PI, std::nullopt, &Error);
|
||||
if (!Error.empty()) {
|
||||
errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
|
||||
return errorCodeToError(make_error_code(llvm::errc::no_child_process));
|
||||
}
|
||||
|
||||
ErrorOr<uint64_t> FsRes = getFileSize(PathData);
|
||||
if (std::error_code EC = FsRes.getError())
|
||||
return errorCodeToError(EC);
|
||||
SS << PPI->Type << formatv("={0:x-};", *FsRes);
|
||||
|
||||
// Merge all perf-scripts jobs' output into the single OutputFile
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
||||
MemoryBuffer::getFileOrSTDIN(PathData);
|
||||
if (std::error_code EC = MB.getError()) {
|
||||
errs() << "Cannot open " << PathData << ": " << EC.message() << "\n";
|
||||
return errorCodeToError(EC);
|
||||
}
|
||||
OutFile << (*MB)->getBuffer();
|
||||
}
|
||||
|
||||
OutFile.seek(0);
|
||||
OutFile << Header;
|
||||
OutFile.close();
|
||||
outs() << "PERF2BOLT: Profile is saved to file " << opts::OutputFilename
|
||||
<< "\n";
|
||||
deleteTempFiles();
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
void DataAggregator::filterBinaryMMapInfo() {
|
||||
if (opts::FilterPID) {
|
||||
auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
|
||||
@ -594,7 +669,13 @@ void DataAggregator::imputeFallThroughs() {
|
||||
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
|
||||
this->BC = &BC;
|
||||
|
||||
if (opts::ReadPreAggregated) {
|
||||
if (opts::GeneratePerfTextProfile) {
|
||||
if (Error E = generatePerfTextData()) {
|
||||
deleteTempFiles();
|
||||
exit(1);
|
||||
}
|
||||
exit(0);
|
||||
} else if (opts::ReadPreAggregated) {
|
||||
parsePreAggregated();
|
||||
} else {
|
||||
parsePerfData(BC);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user