[BOLT][BAT] Add entries for deleted basic blocks
Deleted basic blocks are required for correct mapping of branches modified by SCTC. Increases BAT size, bytes: - large binary: 8622496 -> 8703244. - small binary (X86/bolt-address-translation.test): 928 -> 940. Test Plan: updated bb-with-two-tail-calls.s Reviewers: ayermolo, dcci, maksfb, rafaelauler Reviewed By: rafaelauler Pull Request: https://github.com/llvm/llvm-project/pull/91906
This commit is contained in:
parent
29189738b8
commit
d1d9545ed3
@ -106,9 +106,14 @@ equals output offset.
|
||||
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
|
||||
(branch or call instruction). If not set, it signifies a control flow target
|
||||
(basic block offset).
|
||||
|
||||
`InputAddr` is omitted for equal offsets in input and output function. In this
|
||||
case, `BRANCHENTRY` bits are encoded separately in a `BranchEntries` bitvector.
|
||||
|
||||
Deleted basic blocks are emitted as having `OutputOffset` equal to the size of
|
||||
the function. They don't affect address translation and only participate in
|
||||
input basic block mapping.
|
||||
|
||||
### Secondary Entry Points table
|
||||
The table is emitted for hot fragments only. It contains `NumSecEntryPoints`
|
||||
offsets denoting secondary entry points, delta encoded, implicitly starting at zero.
|
||||
|
||||
@ -70,7 +70,7 @@ class BinaryFunction;
|
||||
class BoltAddressTranslation {
|
||||
public:
|
||||
// In-memory representation of the address translation table
|
||||
using MapTy = std::map<uint32_t, uint32_t>;
|
||||
using MapTy = std::multimap<uint32_t, uint32_t>;
|
||||
|
||||
// List of taken fall-throughs
|
||||
using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>;
|
||||
@ -218,6 +218,7 @@ public:
|
||||
auto begin() const { return Map.begin(); }
|
||||
auto end() const { return Map.end(); }
|
||||
auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); }
|
||||
auto size() const { return Map.size(); }
|
||||
};
|
||||
|
||||
/// Map function output address to its hash and basic blocks hash map.
|
||||
|
||||
@ -54,7 +54,7 @@ void BoltAddressTranslation::writeEntriesForBB(
|
||||
// and this deleted block will both share the same output address (the same
|
||||
// key), and we need to map back. We choose here to privilege the successor by
|
||||
// allowing it to overwrite the previously inserted key in the map.
|
||||
Map[BBOutputOffset] = BBInputOffset << 1;
|
||||
Map.emplace(BBOutputOffset, BBInputOffset << 1);
|
||||
|
||||
const auto &IOAddressMap =
|
||||
BB.getFunction()->getBinaryContext().getIOAddressMap();
|
||||
@ -71,8 +71,7 @@ void BoltAddressTranslation::writeEntriesForBB(
|
||||
|
||||
LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset) << " Val: "
|
||||
<< Twine::utohexstr(InputOffset) << " (branch)\n");
|
||||
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
|
||||
(InputOffset << 1) | BRANCHENTRY));
|
||||
Map.emplace(OutputOffset, (InputOffset << 1) | BRANCHENTRY);
|
||||
}
|
||||
}
|
||||
|
||||
@ -107,6 +106,19 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
|
||||
for (const BinaryBasicBlock *const BB :
|
||||
Function.getLayout().getMainFragment())
|
||||
writeEntriesForBB(Map, *BB, InputAddress, OutputAddress);
|
||||
// Add entries for deleted blocks. They are still required for correct BB
|
||||
// mapping of branches modified by SCTC. By convention, they would have the
|
||||
// end of the function as output address.
|
||||
const BBHashMapTy &BBHashMap = getBBHashMap(InputAddress);
|
||||
if (BBHashMap.size() != Function.size()) {
|
||||
const uint64_t EndOffset = Function.getOutputSize();
|
||||
std::unordered_set<uint32_t> MappedInputOffsets;
|
||||
for (const BinaryBasicBlock &BB : Function)
|
||||
MappedInputOffsets.emplace(BB.getInputOffset());
|
||||
for (const auto &[InputOffset, _] : BBHashMap)
|
||||
if (!llvm::is_contained(MappedInputOffsets, InputOffset))
|
||||
Map.emplace(EndOffset, InputOffset << 1);
|
||||
}
|
||||
Maps.emplace(Function.getOutputAddress(), std::move(Map));
|
||||
ReverseMap.emplace(OutputAddress, InputAddress);
|
||||
|
||||
|
||||
@ -2349,11 +2349,11 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
|
||||
BAT->getBBHashMap(FuncAddress);
|
||||
YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);
|
||||
|
||||
for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks))
|
||||
YamlBB.Index = Idx;
|
||||
|
||||
for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI)
|
||||
YamlBF.Blocks[BI->second.Index].Hash = BI->second.Hash;
|
||||
for (auto &&[Entry, YamlBB] : llvm::zip(BlockMap, YamlBF.Blocks)) {
|
||||
const auto &Block = Entry.second;
|
||||
YamlBB.Hash = Block.Hash;
|
||||
YamlBB.Index = Block.Index;
|
||||
}
|
||||
|
||||
// Lookup containing basic block offset and index
|
||||
auto getBlock = [&BlockMap](uint32_t Offset) {
|
||||
|
||||
@ -8,11 +8,21 @@
|
||||
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
|
||||
# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --lite=0 --dyno-stats \
|
||||
# RUN: --print-sctc --print-only=_start -enable-bat 2>&1 | FileCheck %s
|
||||
# RUN: llvm-objdump --syms %t.out > %t.log
|
||||
# RUN: llvm-bat-dump %t.out --dump-all >> %t.log
|
||||
# RUN: FileCheck %s --input-file %t.log --check-prefix=CHECK-BAT
|
||||
|
||||
# CHECK-NOT: Assertion `BranchInfo.size() == 2 && "could only be called for blocks with 2 successors"' failed.
|
||||
# Two tail calls in the same basic block after SCTC:
|
||||
# CHECK: {{.*}}: ja {{.*}} # TAILCALL # Offset: 7 # CTCTakenCount: 4
|
||||
# CHECK-NEXT: {{.*}}: jmp {{.*}} # TAILCALL # Offset: 13
|
||||
|
||||
# Confirm that a deleted basic block is emitted at function end offset (0xe)
|
||||
# CHECK-BAT: [[#%x,ADDR:]] g .text [[#%x,SIZE:]] _start
|
||||
# CHECK-BAT: Function Address: 0x[[#%x,ADDR]]
|
||||
# CHECK-BAT: 0x[[#%x,SIZE]]
|
||||
# CHECK-BAT: NumBlocks: 5
|
||||
|
||||
.globl _start
|
||||
_start:
|
||||
je x
|
||||
|
||||
@ -41,7 +41,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s
|
||||
|
||||
WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps
|
||||
WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes
|
||||
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 384
|
||||
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 404
|
||||
|
||||
READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
|
||||
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries
|
||||
|
||||
@ -37,7 +37,7 @@
|
||||
# CHECK: BOLT: 3 out of 7 functions were overwritten.
|
||||
# CHECK: BOLT-INFO: Wrote 6 BAT maps
|
||||
# CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes
|
||||
# CHECK: BOLT-INFO: BAT section size (bytes): 928
|
||||
# CHECK: BOLT-INFO: BAT section size (bytes): 940
|
||||
#
|
||||
# usqrt mappings (hot part). We match against any key (left side containing
|
||||
# the bolted binary offsets) because BOLT may change where it puts instructions
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user