[BOLT] Support pre-aggregated basic sample profile (#140196)
Define a pre-aggregated basic sample format: ``` E <event name> S <location> <count> ``` `-nl` flag is required to use parsed basic samples. Test Plan: update pre-aggregated-perf.test
This commit is contained in:
parent
c4806dbda3
commit
18e51314c4
@ -370,33 +370,46 @@ private:
|
||||
/// memory.
|
||||
///
|
||||
/// File format syntax:
|
||||
/// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
|
||||
/// <count> [<mispred_count>]
|
||||
/// E <event>
|
||||
/// S <start> <count>
|
||||
/// T <start> <end> <ft_end> <count>
|
||||
/// B <start> <end> <count> <mispred_count>
|
||||
/// [Ff] <start> <end> <count>
|
||||
///
|
||||
/// B - indicates an aggregated branch
|
||||
/// F - an aggregated fall-through
|
||||
/// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
|
||||
///
|
||||
/// E - name of the sampling event used for subsequent entries
|
||||
/// S - indicates an aggregated basic sample at <start>
|
||||
/// B - indicates an aggregated branch from <start> to <end>
|
||||
/// F - an aggregated fall-through from <start> to <end>
|
||||
/// f - an aggregated fall-through with external origin - used to disambiguate
|
||||
/// between a return hitting a basic block head and a regular internal
|
||||
/// jump to the block
|
||||
/// T - an aggregated trace: branch with a fall-through (from, to, ft_end)
|
||||
/// T - an aggregated trace: branch from <start> to <end> with a fall-through
|
||||
/// to <ft_end>
|
||||
///
|
||||
/// <start_id> - build id of the object containing the start address. We can
|
||||
/// skip it for the main binary and use "X" for an unknown object. This will
|
||||
/// save some space and facilitate human parsing.
|
||||
/// <id> - build id of the object containing the address. We can skip it for
|
||||
/// the main binary and use "X" for an unknown object. This will save some
|
||||
/// space and facilitate human parsing.
|
||||
///
|
||||
/// <start_offset> - hex offset from the object base load address (0 for the
|
||||
/// main executable unless it's PIE) to the start address.
|
||||
/// <offset> - hex offset from the object base load address (0 for the
|
||||
/// main executable unless it's PIE) to the address.
|
||||
///
|
||||
/// <end_id>, <end_offset> - same for the end address.
|
||||
///
|
||||
/// <ft_end> - same for the fallthrough_end address.
|
||||
///
|
||||
/// <count> - total aggregated count of the branch or a fall-through.
|
||||
/// <count> - total aggregated count.
|
||||
///
|
||||
/// <mispred_count> - the number of times the branch was mispredicted.
|
||||
/// Omitted for fall-throughs.
|
||||
///
|
||||
/// Example:
|
||||
/// Basic samples profile:
|
||||
/// E cycles
|
||||
/// S 41be50 3
|
||||
/// E br_inst_retired.near_taken
|
||||
/// S 41be60 6
|
||||
///
|
||||
/// Trace profile combining branches and fall-throughs:
|
||||
/// T 4b196f 4b19e0 4b19ef 2
|
||||
///
|
||||
/// Legacy branch profile with separate branches and fall-throughs:
|
||||
/// F 41be50 41be50 3
|
||||
/// F 41be90 41be90 4
|
||||
/// B 4b1942 39b57f0 3 0
|
||||
|
@ -1204,60 +1204,74 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
|
||||
}
|
||||
|
||||
std::error_code DataAggregator::parseAggregatedLBREntry() {
|
||||
while (checkAndConsumeFS()) {
|
||||
}
|
||||
enum AggregatedLBREntry : char {
|
||||
INVALID = 0,
|
||||
EVENT_NAME, // E
|
||||
TRACE, // T
|
||||
SAMPLE, // S
|
||||
BRANCH, // B
|
||||
FT, // F
|
||||
FT_EXTERNAL_ORIGIN // f
|
||||
} Type = INVALID;
|
||||
|
||||
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
|
||||
if (std::error_code EC = TypeOrErr.getError())
|
||||
return EC;
|
||||
enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
|
||||
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
|
||||
.Case("T", TRACE)
|
||||
.Case("B", BRANCH)
|
||||
.Case("F", FT)
|
||||
.Case("f", FT_EXTERNAL_ORIGIN)
|
||||
.Default(INVALID);
|
||||
if (Type == INVALID) {
|
||||
reportError("expected T, B, F or f");
|
||||
return make_error_code(llvm::errc::io_error);
|
||||
}
|
||||
// The number of fields to parse, set based on Type.
|
||||
int AddrNum = 0;
|
||||
int CounterNum = 0;
|
||||
// Storage for parsed fields.
|
||||
StringRef EventName;
|
||||
std::optional<Location> Addr[3];
|
||||
int64_t Counters[2];
|
||||
|
||||
while (checkAndConsumeFS()) {
|
||||
}
|
||||
ErrorOr<Location> From = parseLocationOrOffset();
|
||||
if (std::error_code EC = From.getError())
|
||||
return EC;
|
||||
|
||||
while (checkAndConsumeFS()) {
|
||||
}
|
||||
ErrorOr<Location> To = parseLocationOrOffset();
|
||||
if (std::error_code EC = To.getError())
|
||||
return EC;
|
||||
|
||||
ErrorOr<Location> TraceFtEnd = std::error_code();
|
||||
if (Type == AggregatedLBREntry::TRACE) {
|
||||
while (Type == INVALID || Type == EVENT_NAME) {
|
||||
while (checkAndConsumeFS()) {
|
||||
}
|
||||
TraceFtEnd = parseLocationOrOffset();
|
||||
if (std::error_code EC = TraceFtEnd.getError())
|
||||
ErrorOr<StringRef> StrOrErr =
|
||||
parseString(FieldSeparator, Type == EVENT_NAME);
|
||||
if (std::error_code EC = StrOrErr.getError())
|
||||
return EC;
|
||||
StringRef Str = StrOrErr.get();
|
||||
|
||||
if (Type == EVENT_NAME) {
|
||||
EventName = Str;
|
||||
break;
|
||||
}
|
||||
|
||||
Type = StringSwitch<AggregatedLBREntry>(Str)
|
||||
.Case("T", TRACE)
|
||||
.Case("S", SAMPLE)
|
||||
.Case("E", EVENT_NAME)
|
||||
.Case("B", BRANCH)
|
||||
.Case("F", FT)
|
||||
.Case("f", FT_EXTERNAL_ORIGIN)
|
||||
.Default(INVALID);
|
||||
|
||||
if (Type == INVALID) {
|
||||
reportError("expected T, S, E, B, F or f");
|
||||
return make_error_code(llvm::errc::io_error);
|
||||
}
|
||||
|
||||
using SSI = StringSwitch<int>;
|
||||
AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2);
|
||||
CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1);
|
||||
}
|
||||
|
||||
while (checkAndConsumeFS()) {
|
||||
}
|
||||
ErrorOr<int64_t> Frequency =
|
||||
parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
|
||||
if (std::error_code EC = Frequency.getError())
|
||||
return EC;
|
||||
|
||||
uint64_t Mispreds = 0;
|
||||
if (Type == AggregatedLBREntry::BRANCH) {
|
||||
for (int I = 0; I < AddrNum; ++I) {
|
||||
while (checkAndConsumeFS()) {
|
||||
}
|
||||
ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
|
||||
if (std::error_code EC = MispredsOrErr.getError())
|
||||
ErrorOr<Location> AddrOrErr = parseLocationOrOffset();
|
||||
if (std::error_code EC = AddrOrErr.getError())
|
||||
return EC;
|
||||
Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
|
||||
Addr[I] = AddrOrErr.get();
|
||||
}
|
||||
|
||||
for (int I = 0; I < CounterNum; ++I) {
|
||||
while (checkAndConsumeFS()) {
|
||||
}
|
||||
ErrorOr<int64_t> CountOrErr =
|
||||
parseNumberField(FieldSeparator, I + 1 == CounterNum);
|
||||
if (std::error_code EC = CountOrErr.getError())
|
||||
return EC;
|
||||
Counters[I] = CountOrErr.get();
|
||||
}
|
||||
|
||||
if (!checkAndConsumeNewLine()) {
|
||||
@ -1265,16 +1279,31 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
|
||||
return make_error_code(llvm::errc::io_error);
|
||||
}
|
||||
|
||||
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
|
||||
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
|
||||
if (Type == EVENT_NAME) {
|
||||
EventNames.insert(EventName);
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
for (BinaryFunction *BF : {FromFunc, ToFunc})
|
||||
if (BF)
|
||||
BF->setHasProfileAvailable();
|
||||
const uint64_t FromOffset = Addr[0]->Offset;
|
||||
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
|
||||
if (FromFunc)
|
||||
FromFunc->setHasProfileAvailable();
|
||||
|
||||
uint64_t Count = static_cast<uint64_t>(Frequency.get());
|
||||
int64_t Count = Counters[0];
|
||||
int64_t Mispreds = Counters[1];
|
||||
|
||||
Trace Trace(From->Offset, To->Offset);
|
||||
if (Type == SAMPLE) {
|
||||
BasicSamples[FromOffset] += Count;
|
||||
NumTotalSamples += Count;
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
const uint64_t ToOffset = Addr[1]->Offset;
|
||||
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
|
||||
if (ToFunc)
|
||||
ToFunc->setHasProfileAvailable();
|
||||
|
||||
Trace Trace(FromOffset, ToOffset);
|
||||
// Taken trace
|
||||
if (Type == TRACE || Type == BRANCH) {
|
||||
TakenBranchInfo &Info = BranchLBRs[Trace];
|
||||
@ -1285,8 +1314,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
|
||||
}
|
||||
// Construct fallthrough part of the trace
|
||||
if (Type == TRACE) {
|
||||
Trace.From = To->Offset;
|
||||
Trace.To = TraceFtEnd->Offset;
|
||||
const uint64_t TraceFtEndOffset = Addr[2]->Offset;
|
||||
Trace.From = ToOffset;
|
||||
Trace.To = TraceFtEndOffset;
|
||||
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
|
||||
}
|
||||
// Add fallthrough trace
|
||||
|
19
bolt/test/X86/Inputs/pre-aggregated-basic.txt
Normal file
19
bolt/test/X86/Inputs/pre-aggregated-basic.txt
Normal file
@ -0,0 +1,19 @@
|
||||
E cycles
|
||||
S 4005f0 1
|
||||
S 4005f0 1
|
||||
S 400610 1
|
||||
S 400ad1 2
|
||||
S 400b10 1
|
||||
S 400bb7 1
|
||||
S 400bbc 2
|
||||
S 400d90 1
|
||||
S 400dae 1
|
||||
S 400e00 2
|
||||
S 401170 22
|
||||
S 401180 58
|
||||
S 4011a0 33
|
||||
S 4011a9 33
|
||||
S 4011ad 58
|
||||
S 4011b2 22
|
||||
S X:7f36d18d60c0 2
|
||||
S X:7f36d18f2ce0 1
|
@ -57,6 +57,16 @@ RUN: llvm-bolt %t.exe -o %t.bolt.yaml --pa -p %p/Inputs/pre-aggregated.txt \
|
||||
RUN: --aggregate-only --profile-format=yaml --profile-use-dfs
|
||||
RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT
|
||||
|
||||
## Test pre-aggregated basic profile
|
||||
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
|
||||
RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR
|
||||
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba.nl \
|
||||
RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
|
||||
RUN: FileCheck %s --input-file %t.ba.nl --check-prefix CHECK-BASIC-NL
|
||||
BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile
|
||||
BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
|
||||
CHECK-BASIC-NL: no_lbr cycles
|
||||
|
||||
PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2
|
||||
PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2
|
||||
PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1
|
||||
|
@ -36,9 +36,9 @@ prefix_pat = re.compile(f"^# {args.prefix}: (.*)")
|
||||
fdata_pat = re.compile(r"([01].*) (?P<mispred>\d+) (?P<exec>\d+)")
|
||||
|
||||
# Pre-aggregated profile:
|
||||
# {T|B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
|
||||
# <count> [<mispred_count>]
|
||||
preagg_pat = re.compile(r"(?P<type>[TBFf]) (?P<offsets_count>.*)")
|
||||
# {T|S|E|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
|
||||
# <loc>: [<id>:]<offset>
|
||||
preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)")
|
||||
|
||||
# No-LBR profile:
|
||||
# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
|
||||
|
Loading…
x
Reference in New Issue
Block a user