[BOLT] Support pre-aggregated basic sample profile (#140196)

Define a pre-aggregated basic sample format:
```
E <event name>
S <location> <count>
```

`-nl` flag is required to use parsed basic samples.

Test Plan: update pre-aggregated-perf.test
This commit is contained in:
Amir Ayupov 2025-06-02 11:43:48 -07:00 committed by GitHub
parent c4806dbda3
commit 18e51314c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 144 additions and 72 deletions

View File

@ -370,33 +370,46 @@ private:
/// memory.
///
/// File format syntax:
/// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
/// <count> [<mispred_count>]
/// E <event>
/// S <start> <count>
/// T <start> <end> <ft_end> <count>
/// B <start> <end> <count> <mispred_count>
/// [Ff] <start> <end> <count>
///
/// B - indicates an aggregated branch
/// F - an aggregated fall-through
/// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
///
/// E - name of the sampling event used for subsequent entries
/// S - indicates an aggregated basic sample at <start>
/// B - indicates an aggregated branch from <start> to <end>
/// F - an aggregated fall-through from <start> to <end>
/// f - an aggregated fall-through with external origin - used to disambiguate
/// between a return hitting a basic block head and a regular internal
/// jump to the block
/// T - an aggregated trace: branch with a fall-through (from, to, ft_end)
/// T - an aggregated trace: branch from <start> to <end> with a fall-through
/// to <ft_end>
///
/// <start_id> - build id of the object containing the start address. We can
/// skip it for the main binary and use "X" for an unknown object. This will
/// save some space and facilitate human parsing.
/// <id> - build id of the object containing the address. We can skip it for
/// the main binary and use "X" for an unknown object. This will save some
/// space and facilitate human parsing.
///
/// <start_offset> - hex offset from the object base load address (0 for the
/// main executable unless it's PIE) to the start address.
/// <offset> - hex offset from the object base load address (0 for the
/// main executable unless it's PIE) to the address.
///
/// <end_id>, <end_offset> - same for the end address.
///
/// <ft_end> - same for the fallthrough_end address.
///
/// <count> - total aggregated count of the branch or a fall-through.
/// <count> - total aggregated count.
///
/// <mispred_count> - the number of times the branch was mispredicted.
/// Omitted for fall-throughs.
///
/// Example:
/// Basic samples profile:
/// E cycles
/// S 41be50 3
/// E br_inst_retired.near_taken
/// S 41be60 6
///
/// Trace profile combining branches and fall-throughs:
/// T 4b196f 4b19e0 4b19ef 2
///
/// Legacy branch profile with separate branches and fall-throughs:
/// F 41be50 41be50 3
/// F 41be90 41be90 4
/// B 4b1942 39b57f0 3 0

View File

@ -1204,60 +1204,74 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
}
std::error_code DataAggregator::parseAggregatedLBREntry() {
while (checkAndConsumeFS()) {
}
enum AggregatedLBREntry : char {
INVALID = 0,
EVENT_NAME, // E
TRACE, // T
SAMPLE, // S
BRANCH, // B
FT, // F
FT_EXTERNAL_ORIGIN // f
} Type = INVALID;
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
.Case("T", TRACE)
.Case("B", BRANCH)
.Case("F", FT)
.Case("f", FT_EXTERNAL_ORIGIN)
.Default(INVALID);
if (Type == INVALID) {
reportError("expected T, B, F or f");
return make_error_code(llvm::errc::io_error);
}
// The number of fields to parse, set based on Type.
int AddrNum = 0;
int CounterNum = 0;
// Storage for parsed fields.
StringRef EventName;
std::optional<Location> Addr[3];
int64_t Counters[2];
while (checkAndConsumeFS()) {
}
ErrorOr<Location> From = parseLocationOrOffset();
if (std::error_code EC = From.getError())
return EC;
while (checkAndConsumeFS()) {
}
ErrorOr<Location> To = parseLocationOrOffset();
if (std::error_code EC = To.getError())
return EC;
ErrorOr<Location> TraceFtEnd = std::error_code();
if (Type == AggregatedLBREntry::TRACE) {
while (Type == INVALID || Type == EVENT_NAME) {
while (checkAndConsumeFS()) {
}
TraceFtEnd = parseLocationOrOffset();
if (std::error_code EC = TraceFtEnd.getError())
ErrorOr<StringRef> StrOrErr =
parseString(FieldSeparator, Type == EVENT_NAME);
if (std::error_code EC = StrOrErr.getError())
return EC;
StringRef Str = StrOrErr.get();
if (Type == EVENT_NAME) {
EventName = Str;
break;
}
Type = StringSwitch<AggregatedLBREntry>(Str)
.Case("T", TRACE)
.Case("S", SAMPLE)
.Case("E", EVENT_NAME)
.Case("B", BRANCH)
.Case("F", FT)
.Case("f", FT_EXTERNAL_ORIGIN)
.Default(INVALID);
if (Type == INVALID) {
reportError("expected T, S, E, B, F or f");
return make_error_code(llvm::errc::io_error);
}
using SSI = StringSwitch<int>;
AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2);
CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1);
}
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> Frequency =
parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
if (std::error_code EC = Frequency.getError())
return EC;
uint64_t Mispreds = 0;
if (Type == AggregatedLBREntry::BRANCH) {
for (int I = 0; I < AddrNum; ++I) {
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
if (std::error_code EC = MispredsOrErr.getError())
ErrorOr<Location> AddrOrErr = parseLocationOrOffset();
if (std::error_code EC = AddrOrErr.getError())
return EC;
Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
Addr[I] = AddrOrErr.get();
}
for (int I = 0; I < CounterNum; ++I) {
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> CountOrErr =
parseNumberField(FieldSeparator, I + 1 == CounterNum);
if (std::error_code EC = CountOrErr.getError())
return EC;
Counters[I] = CountOrErr.get();
}
if (!checkAndConsumeNewLine()) {
@ -1265,16 +1279,31 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
return make_error_code(llvm::errc::io_error);
}
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
if (Type == EVENT_NAME) {
EventNames.insert(EventName);
return std::error_code();
}
for (BinaryFunction *BF : {FromFunc, ToFunc})
if (BF)
BF->setHasProfileAvailable();
const uint64_t FromOffset = Addr[0]->Offset;
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
if (FromFunc)
FromFunc->setHasProfileAvailable();
uint64_t Count = static_cast<uint64_t>(Frequency.get());
int64_t Count = Counters[0];
int64_t Mispreds = Counters[1];
Trace Trace(From->Offset, To->Offset);
if (Type == SAMPLE) {
BasicSamples[FromOffset] += Count;
NumTotalSamples += Count;
return std::error_code();
}
const uint64_t ToOffset = Addr[1]->Offset;
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
if (ToFunc)
ToFunc->setHasProfileAvailable();
Trace Trace(FromOffset, ToOffset);
// Taken trace
if (Type == TRACE || Type == BRANCH) {
TakenBranchInfo &Info = BranchLBRs[Trace];
@ -1285,8 +1314,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
}
// Construct fallthrough part of the trace
if (Type == TRACE) {
Trace.From = To->Offset;
Trace.To = TraceFtEnd->Offset;
const uint64_t TraceFtEndOffset = Addr[2]->Offset;
Trace.From = ToOffset;
Trace.To = TraceFtEndOffset;
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
}
// Add fallthrough trace

View File

@ -0,0 +1,19 @@
E cycles
S 4005f0 1
S 4005f0 1
S 400610 1
S 400ad1 2
S 400b10 1
S 400bb7 1
S 400bbc 2
S 400d90 1
S 400dae 1
S 400e00 2
S 401170 22
S 401180 58
S 4011a0 33
S 4011a9 33
S 4011ad 58
S 4011b2 22
S X:7f36d18d60c0 2
S X:7f36d18f2ce0 1

View File

@ -57,6 +57,16 @@ RUN: llvm-bolt %t.exe -o %t.bolt.yaml --pa -p %p/Inputs/pre-aggregated.txt \
RUN: --aggregate-only --profile-format=yaml --profile-use-dfs
RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT
## Test pre-aggregated basic profile
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba.nl \
RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
RUN: FileCheck %s --input-file %t.ba.nl --check-prefix CHECK-BASIC-NL
BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile
BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
CHECK-BASIC-NL: no_lbr cycles
PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2
PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2
PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1

View File

@ -36,9 +36,9 @@ prefix_pat = re.compile(f"^# {args.prefix}: (.*)")
fdata_pat = re.compile(r"([01].*) (?P<mispred>\d+) (?P<exec>\d+)")
# Pre-aggregated profile:
# {T|B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
# <count> [<mispred_count>]
preagg_pat = re.compile(r"(?P<type>[TBFf]) (?P<offsets_count>.*)")
# {T|S|E|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
# <loc>: [<id>:]<offset>
preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)")
# No-LBR profile:
# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>