[BOLT][BAT] Add entries for deleted basic blocks

Deleted basic blocks are required for correct mapping of branches
modified by SCTC.

Increases BAT size, bytes:
- large binary: 8622496 -> 8703244.
- small binary (X86/bolt-address-translation.test): 928 -> 940.

Test Plan: updated bb-with-two-tail-calls.s

Reviewers: ayermolo, dcci, maksfb, rafaelauler

Reviewed By: rafaelauler

Pull Request: https://github.com/llvm/llvm-project/pull/91906
This commit is contained in:
Amir Ayupov 2024-05-23 19:19:07 -07:00 committed by GitHub
parent 29189738b8
commit d1d9545ed3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 39 additions and 11 deletions

View File

@ -106,9 +106,14 @@ equals output offset.
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
(branch or call instruction). If not set, it signifies a control flow target
(basic block offset).
`InputAddr` is omitted for equal offsets in input and output function. In this
case, `BRANCHENTRY` bits are encoded separately in a `BranchEntries` bitvector.
Deleted basic blocks are emitted as having `OutputOffset` equal to the size of
the function. They don't affect address translation and only participate in
input basic block mapping.
### Secondary Entry Points table
The table is emitted for hot fragments only. It contains `NumSecEntryPoints`
offsets denoting secondary entry points, delta encoded, implicitly starting at zero.

View File

@ -70,7 +70,7 @@ class BinaryFunction;
class BoltAddressTranslation {
public:
// In-memory representation of the address translation table
using MapTy = std::map<uint32_t, uint32_t>;
using MapTy = std::multimap<uint32_t, uint32_t>;
// List of taken fall-throughs
using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>;
@ -218,6 +218,7 @@ public:
auto begin() const { return Map.begin(); }
auto end() const { return Map.end(); }
auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); }
auto size() const { return Map.size(); }
};
/// Map function output address to its hash and basic blocks hash map.

View File

@ -54,7 +54,7 @@ void BoltAddressTranslation::writeEntriesForBB(
// and this deleted block will both share the same output address (the same
// key), and we need to map back. We choose here to privilege the successor by
// allowing it to overwrite the previously inserted key in the map.
Map[BBOutputOffset] = BBInputOffset << 1;
Map.emplace(BBOutputOffset, BBInputOffset << 1);
const auto &IOAddressMap =
BB.getFunction()->getBinaryContext().getIOAddressMap();
@ -71,8 +71,7 @@ void BoltAddressTranslation::writeEntriesForBB(
LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset) << " Val: "
<< Twine::utohexstr(InputOffset) << " (branch)\n");
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
(InputOffset << 1) | BRANCHENTRY));
Map.emplace(OutputOffset, (InputOffset << 1) | BRANCHENTRY);
}
}
@ -107,6 +106,19 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
for (const BinaryBasicBlock *const BB :
Function.getLayout().getMainFragment())
writeEntriesForBB(Map, *BB, InputAddress, OutputAddress);
// Add entries for deleted blocks. They are still required for correct BB
// mapping of branches modified by SCTC. By convention, they would have the
// end of the function as output address.
const BBHashMapTy &BBHashMap = getBBHashMap(InputAddress);
if (BBHashMap.size() != Function.size()) {
const uint64_t EndOffset = Function.getOutputSize();
std::unordered_set<uint32_t> MappedInputOffsets;
for (const BinaryBasicBlock &BB : Function)
MappedInputOffsets.emplace(BB.getInputOffset());
for (const auto &[InputOffset, _] : BBHashMap)
if (!llvm::is_contained(MappedInputOffsets, InputOffset))
Map.emplace(EndOffset, InputOffset << 1);
}
Maps.emplace(Function.getOutputAddress(), std::move(Map));
ReverseMap.emplace(OutputAddress, InputAddress);

View File

@ -2349,11 +2349,11 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
BAT->getBBHashMap(FuncAddress);
YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);
for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks))
YamlBB.Index = Idx;
for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI)
YamlBF.Blocks[BI->second.Index].Hash = BI->second.Hash;
for (auto &&[Entry, YamlBB] : llvm::zip(BlockMap, YamlBF.Blocks)) {
const auto &Block = Entry.second;
YamlBB.Hash = Block.Hash;
YamlBB.Index = Block.Index;
}
// Lookup containing basic block offset and index
auto getBlock = [&BlockMap](uint32_t Offset) {

View File

@ -8,11 +8,21 @@
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --lite=0 --dyno-stats \
# RUN: --print-sctc --print-only=_start -enable-bat 2>&1 | FileCheck %s
# RUN: llvm-objdump --syms %t.out > %t.log
# RUN: llvm-bat-dump %t.out --dump-all >> %t.log
# RUN: FileCheck %s --input-file %t.log --check-prefix=CHECK-BAT
# CHECK-NOT: Assertion `BranchInfo.size() == 2 && "could only be called for blocks with 2 successors"' failed.
# Two tail calls in the same basic block after SCTC:
# CHECK: {{.*}}: ja {{.*}} # TAILCALL # Offset: 7 # CTCTakenCount: 4
# CHECK-NEXT: {{.*}}: jmp {{.*}} # TAILCALL # Offset: 13
# Confirm that a deleted basic block is emitted at function end offset (0xe)
# CHECK-BAT: [[#%x,ADDR:]] g .text [[#%x,SIZE:]] _start
# CHECK-BAT: Function Address: 0x[[#%x,ADDR]]
# CHECK-BAT: 0x[[#%x,SIZE]]
# CHECK-BAT: NumBlocks: 5
.globl _start
_start:
je x

View File

@ -41,7 +41,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s
WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps
WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 384
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 404
READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries

View File

@ -37,7 +37,7 @@
# CHECK: BOLT: 3 out of 7 functions were overwritten.
# CHECK: BOLT-INFO: Wrote 6 BAT maps
# CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes
# CHECK: BOLT-INFO: BAT section size (bytes): 928
# CHECK: BOLT-INFO: BAT section size (bytes): 940
#
# usqrt mappings (hot part). We match against any key (left side containing
# the bolted binary offsets) because BOLT may change where it puts instructions