[BOLT] Validate extra entry point by querying data marker symbols (#154611)

Look up marker symbols and decide whether candidate is really
extra entry point in `adjustFunctionBoundaries()`.
This commit is contained in:
YongKang Zhu 2025-08-20 14:18:56 -07:00 committed by GitHub
parent 5683baea6d
commit 5c4f506cca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 63 additions and 38 deletions

View File

@ -1196,11 +1196,6 @@ public:
return getSecondaryEntryPointSymbol(BB.getLabel());
}
/// Remove a label from the secondary entry point map.
void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) {
SecondaryEntryPoints.erase(Label);
}
/// Return true if the basic block is an entry point into the function
/// (either primary or secondary).
bool isEntryPoint(const BinaryBasicBlock &BB) const {

View File

@ -241,7 +241,7 @@ private:
/// Adjust function sizes and set proper maximum size values after the whole
/// symbol table has been processed.
void adjustFunctionBoundaries();
void adjustFunctionBoundaries(DenseMap<uint64_t, MarkerSymType> &MarkerSyms);
/// Make .eh_frame section relocatable.
void relocateEHFrameSection();

View File

@ -1915,13 +1915,9 @@ void BinaryFunction::postProcessEntryPoints() {
continue;
// If we have grabbed a wrong code label which actually points to some
// constant island inside the function, ignore this label and remove it
// from the secondary entry point map.
if (isStartOfConstantIsland(Offset)) {
BC.SymbolToFunctionMap.erase(Label);
removeSymbolFromSecondaryEntryPointMap(Label);
// constant island inside the function, ignore this label.
if (isStartOfConstantIsland(Offset))
continue;
}
BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
"detected in function "

View File

@ -881,14 +881,9 @@ void RewriteInstance::discoverFileObjects() {
// code section (see IHI0056B). $d identifies data contents.
// Compilers usually merge multiple data objects in a single $d-$x interval,
// but we need every data object to be marked with $d. Because of that we
// create a vector of MarkerSyms with all locations of data objects.
// keep track of marker symbols with all locations of data objects.
struct MarkerSym {
uint64_t Address;
MarkerSymType Type;
};
std::vector<MarkerSym> SortedMarkerSymbols;
DenseMap<uint64_t, MarkerSymType> MarkerSymbols;
auto addExtraDataMarkerPerSymbol = [&]() {
bool IsData = false;
uint64_t LastAddr = 0;
@ -912,14 +907,14 @@ void RewriteInstance::discoverFileObjects() {
}
if (MarkerType != MarkerSymType::NONE) {
SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType});
MarkerSymbols[SymInfo.Address] = MarkerType;
LastAddr = SymInfo.Address;
IsData = MarkerType == MarkerSymType::DATA;
continue;
}
if (IsData) {
SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA});
MarkerSymbols[SymInfo.Address] = MarkerSymType::DATA;
LastAddr = SymInfo.Address;
}
}
@ -1284,27 +1279,24 @@ void RewriteInstance::discoverFileObjects() {
BC->setHasSymbolsWithFileName(FileSymbols.size());
// Now that all the functions were created - adjust their boundaries.
adjustFunctionBoundaries();
adjustFunctionBoundaries(MarkerSymbols);
// Annotate functions with code/data markers in AArch64
for (auto ISym = SortedMarkerSymbols.begin();
ISym != SortedMarkerSymbols.end(); ++ISym) {
auto *BF =
BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
for (auto &[Address, Type] : MarkerSymbols) {
auto *BF = BC->getBinaryFunctionContainingAddress(Address, true, true);
if (!BF) {
// Stray marker
continue;
}
const auto EntryOffset = ISym->Address - BF->getAddress();
if (ISym->Type == MarkerSymType::CODE) {
const auto EntryOffset = Address - BF->getAddress();
if (Type == MarkerSymType::CODE) {
BF->markCodeAtOffset(EntryOffset);
continue;
}
if (ISym->Type == MarkerSymType::DATA) {
if (Type == MarkerSymType::DATA) {
BF->markDataAtOffset(EntryOffset);
BC->AddressToConstantIslandMap[ISym->Address] = BF;
BC->AddressToConstantIslandMap[Address] = BF;
continue;
}
llvm_unreachable("Unknown marker");
@ -1833,7 +1825,8 @@ void RewriteInstance::disassemblePLT() {
}
}
void RewriteInstance::adjustFunctionBoundaries() {
void RewriteInstance::adjustFunctionBoundaries(
DenseMap<uint64_t, MarkerSymType> &MarkerSyms) {
for (auto BFI = BC->getBinaryFunctions().begin(),
BFE = BC->getBinaryFunctions().end();
BFI != BFE; ++BFI) {
@ -1871,12 +1864,15 @@ void RewriteInstance::adjustFunctionBoundaries() {
continue;
}
// This is potentially another entry point into the function.
uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
<< Function << " at offset 0x"
<< Twine::utohexstr(EntryOffset) << '\n');
Function.addEntryPointAtOffset(EntryOffset);
auto It = MarkerSyms.find(NextSymRefI->first);
if (It == MarkerSyms.end() || It->second != MarkerSymType::DATA) {
// This is potentially another entry point into the function.
uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
<< Function << " at offset 0x"
<< Twine::utohexstr(EntryOffset) << '\n');
Function.addEntryPointAtOffset(EntryOffset);
}
++NextSymRefI;
}

View File

@ -0,0 +1,38 @@
# This test is to ensure that we query data marker symbols to avoid
# misidentifying constant data island symbol as extra entry point.
# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar
# RUN: llvm-bolt %t.so -o %t.instr.so
.text
.global _start
.type _start, %function
_start:
ret
.text
.global _foo
.type _foo, %function
_foo:
cbz x1, _foo_2
_foo_1:
add x1, x2, x0
b _foo
_foo_2:
ret
# None of these constant island symbols should be identified as extra entry
# point for function `_foo'.
.align 4
_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80
_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0
_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0
.text
.global _bar
.type _bar, %function
_bar:
ret
# Dummy relocation to force relocation mode
.reloc 0, R_AARCH64_NONE