From 18e51314c41ea3093f28659cd15095778dfe88f7 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 2 Jun 2025 11:43:48 -0700 Subject: [PATCH] [BOLT] Support pre-aggregated basic sample profile (#140196) Define a pre-aggregated basic sample format: ``` E S ``` `-nl` flag is required to use parsed basic samples. Test Plan: update pre-aggregated-perf.test --- bolt/include/bolt/Profile/DataAggregator.h | 45 +++--- bolt/lib/Profile/DataAggregator.cpp | 136 +++++++++++------- bolt/test/X86/Inputs/pre-aggregated-basic.txt | 19 +++ bolt/test/X86/pre-aggregated-perf.test | 10 ++ bolt/test/link_fdata.py | 6 +- 5 files changed, 144 insertions(+), 72 deletions(-) create mode 100644 bolt/test/X86/Inputs/pre-aggregated-basic.txt diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 6d918134137d..cb8e81b829a0 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -370,33 +370,46 @@ private: /// memory. /// /// File format syntax: - /// {B|F|f|T} [:] [:] [] - /// [] + /// E + /// S + /// T + /// B + /// [Ff] /// - /// B - indicates an aggregated branch - /// F - an aggregated fall-through + /// where , , have the format [:] + /// + /// E - name of the sampling event used for subsequent entries + /// S - indicates an aggregated basic sample at + /// B - indicates an aggregated branch from to + /// F - an aggregated fall-through from to /// f - an aggregated fall-through with external origin - used to disambiguate /// between a return hitting a basic block head and a regular internal /// jump to the block - /// T - an aggregated trace: branch with a fall-through (from, to, ft_end) + /// T - an aggregated trace: branch from to with a fall-through + /// to /// - /// - build id of the object containing the start address. We can - /// skip it for the main binary and use "X" for an unknown object. This will - /// save some space and facilitate human parsing. + /// - build id of the object containing the address. We can skip it for + /// the main binary and use "X" for an unknown object. This will save some + /// space and facilitate human parsing. /// - /// - hex offset from the object base load address (0 for the - /// main executable unless it's PIE) to the start address. + /// - hex offset from the object base load address (0 for the + /// main executable unless it's PIE) to the address. /// - /// , - same for the end address. - /// - /// - same for the fallthrough_end address. - /// - /// - total aggregated count of the branch or a fall-through. + /// - total aggregated count. /// /// - the number of times the branch was mispredicted. - /// Omitted for fall-throughs. /// /// Example: + /// Basic samples profile: + /// E cycles + /// S 41be50 3 + /// E br_inst_retired.near_taken + /// S 41be60 6 + /// + /// Trace profile combining branches and fall-throughs: + /// T 4b196f 4b19e0 4b19ef 2 + /// + /// Legacy branch profile with separate branches and fall-throughs: /// F 41be50 41be50 3 /// F 41be90 41be90 4 /// B 4b1942 39b57f0 3 0 diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index d0620b64cad2..2527b5bfe38d 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -1204,60 +1204,74 @@ ErrorOr DataAggregator::parseLocationOrOffset() { } std::error_code DataAggregator::parseAggregatedLBREntry() { - while (checkAndConsumeFS()) { - } + enum AggregatedLBREntry : char { + INVALID = 0, + EVENT_NAME, // E + TRACE, // T + SAMPLE, // S + BRANCH, // B + FT, // F + FT_EXTERNAL_ORIGIN // f + } Type = INVALID; - ErrorOr TypeOrErr = parseString(FieldSeparator); - if (std::error_code EC = TypeOrErr.getError()) - return EC; - enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID }; - auto Type = StringSwitch(TypeOrErr.get()) - .Case("T", TRACE) - .Case("B", BRANCH) - .Case("F", FT) - .Case("f", FT_EXTERNAL_ORIGIN) - .Default(INVALID); - if (Type == INVALID) { - reportError("expected T, B, F or f"); - return make_error_code(llvm::errc::io_error); - } + // The number of fields to parse, set based on Type. + int AddrNum = 0; + int CounterNum = 0; + // Storage for parsed fields. + StringRef EventName; + std::optional Addr[3]; + int64_t Counters[2]; - while (checkAndConsumeFS()) { - } - ErrorOr From = parseLocationOrOffset(); - if (std::error_code EC = From.getError()) - return EC; - - while (checkAndConsumeFS()) { - } - ErrorOr To = parseLocationOrOffset(); - if (std::error_code EC = To.getError()) - return EC; - - ErrorOr TraceFtEnd = std::error_code(); - if (Type == AggregatedLBREntry::TRACE) { + while (Type == INVALID || Type == EVENT_NAME) { while (checkAndConsumeFS()) { } - TraceFtEnd = parseLocationOrOffset(); - if (std::error_code EC = TraceFtEnd.getError()) + ErrorOr StrOrErr = + parseString(FieldSeparator, Type == EVENT_NAME); + if (std::error_code EC = StrOrErr.getError()) return EC; + StringRef Str = StrOrErr.get(); + + if (Type == EVENT_NAME) { + EventName = Str; + break; + } + + Type = StringSwitch(Str) + .Case("T", TRACE) + .Case("S", SAMPLE) + .Case("E", EVENT_NAME) + .Case("B", BRANCH) + .Case("F", FT) + .Case("f", FT_EXTERNAL_ORIGIN) + .Default(INVALID); + + if (Type == INVALID) { + reportError("expected T, S, E, B, F or f"); + return make_error_code(llvm::errc::io_error); + } + + using SSI = StringSwitch; + AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2); + CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1); } - while (checkAndConsumeFS()) { - } - ErrorOr Frequency = - parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH); - if (std::error_code EC = Frequency.getError()) - return EC; - - uint64_t Mispreds = 0; - if (Type == AggregatedLBREntry::BRANCH) { + for (int I = 0; I < AddrNum; ++I) { while (checkAndConsumeFS()) { } - ErrorOr MispredsOrErr = parseNumberField(FieldSeparator, true); - if (std::error_code EC = MispredsOrErr.getError()) + ErrorOr AddrOrErr = parseLocationOrOffset(); + if (std::error_code EC = AddrOrErr.getError()) return EC; - Mispreds = static_cast(MispredsOrErr.get()); + Addr[I] = AddrOrErr.get(); + } + + for (int I = 0; I < CounterNum; ++I) { + while (checkAndConsumeFS()) { + } + ErrorOr CountOrErr = + parseNumberField(FieldSeparator, I + 1 == CounterNum); + if (std::error_code EC = CountOrErr.getError()) + return EC; + Counters[I] = CountOrErr.get(); } if (!checkAndConsumeNewLine()) { @@ -1265,16 +1279,31 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { return make_error_code(llvm::errc::io_error); } - BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset); - BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset); + if (Type == EVENT_NAME) { + EventNames.insert(EventName); + return std::error_code(); + } - for (BinaryFunction *BF : {FromFunc, ToFunc}) - if (BF) - BF->setHasProfileAvailable(); + const uint64_t FromOffset = Addr[0]->Offset; + BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset); + if (FromFunc) + FromFunc->setHasProfileAvailable(); - uint64_t Count = static_cast(Frequency.get()); + int64_t Count = Counters[0]; + int64_t Mispreds = Counters[1]; - Trace Trace(From->Offset, To->Offset); + if (Type == SAMPLE) { + BasicSamples[FromOffset] += Count; + NumTotalSamples += Count; + return std::error_code(); + } + + const uint64_t ToOffset = Addr[1]->Offset; + BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset); + if (ToFunc) + ToFunc->setHasProfileAvailable(); + + Trace Trace(FromOffset, ToOffset); // Taken trace if (Type == TRACE || Type == BRANCH) { TakenBranchInfo &Info = BranchLBRs[Trace]; @@ -1285,8 +1314,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { } // Construct fallthrough part of the trace if (Type == TRACE) { - Trace.From = To->Offset; - Trace.To = TraceFtEnd->Offset; + const uint64_t TraceFtEndOffset = Addr[2]->Offset; + Trace.From = ToOffset; + Trace.To = TraceFtEndOffset; Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN; } // Add fallthrough trace diff --git a/bolt/test/X86/Inputs/pre-aggregated-basic.txt b/bolt/test/X86/Inputs/pre-aggregated-basic.txt new file mode 100644 index 000000000000..dcb85a1d4e44 --- /dev/null +++ b/bolt/test/X86/Inputs/pre-aggregated-basic.txt @@ -0,0 +1,19 @@ +E cycles +S 4005f0 1 +S 4005f0 1 +S 400610 1 +S 400ad1 2 +S 400b10 1 +S 400bb7 1 +S 400bbc 2 +S 400d90 1 +S 400dae 1 +S 400e00 2 +S 401170 22 +S 401180 58 +S 4011a0 33 +S 4011a9 33 +S 4011ad 58 +S 4011b2 22 +S X:7f36d18d60c0 2 +S X:7f36d18f2ce0 1 diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index c05a06bf7494..92e093c238e0 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -57,6 +57,16 @@ RUN: llvm-bolt %t.exe -o %t.bolt.yaml --pa -p %p/Inputs/pre-aggregated.txt \ RUN: --aggregate-only --profile-format=yaml --profile-use-dfs RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT +## Test pre-aggregated basic profile +RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \ +RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR +RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba.nl \ +RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS +RUN: FileCheck %s --input-file %t.ba.nl --check-prefix CHECK-BASIC-NL +BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile +BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile +CHECK-BASIC-NL: no_lbr cycles + PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2 PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2 PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1 diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py index b6358fae1b8d..5a9752068bb9 100755 --- a/bolt/test/link_fdata.py +++ b/bolt/test/link_fdata.py @@ -36,9 +36,9 @@ prefix_pat = re.compile(f"^# {args.prefix}: (.*)") fdata_pat = re.compile(r"([01].*) (?P\d+) (?P\d+)") # Pre-aggregated profile: -# {T|B|F|f} [:] [:] [] -# [] -preagg_pat = re.compile(r"(?P[TBFf]) (?P.*)") +# {T|S|E|B|F|f} [] [] [] +# : [:] +preagg_pat = re.compile(r"(?P[TSBFf]) (?P.*)") # No-LBR profile: #