[BOLT] Validate extra entry point by querying data marker symbols (#154611)
Look up marker symbols and decide whether candidate is really extra entry point in `adjustFunctionBoundaries()`.
This commit is contained in:
parent
5683baea6d
commit
5c4f506cca
@ -1196,11 +1196,6 @@ public:
|
||||
return getSecondaryEntryPointSymbol(BB.getLabel());
|
||||
}
|
||||
|
||||
/// Remove a label from the secondary entry point map.
|
||||
void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) {
|
||||
SecondaryEntryPoints.erase(Label);
|
||||
}
|
||||
|
||||
/// Return true if the basic block is an entry point into the function
|
||||
/// (either primary or secondary).
|
||||
bool isEntryPoint(const BinaryBasicBlock &BB) const {
|
||||
|
@ -241,7 +241,7 @@ private:
|
||||
|
||||
/// Adjust function sizes and set proper maximum size values after the whole
|
||||
/// symbol table has been processed.
|
||||
void adjustFunctionBoundaries();
|
||||
void adjustFunctionBoundaries(DenseMap<uint64_t, MarkerSymType> &MarkerSyms);
|
||||
|
||||
/// Make .eh_frame section relocatable.
|
||||
void relocateEHFrameSection();
|
||||
|
@ -1915,13 +1915,9 @@ void BinaryFunction::postProcessEntryPoints() {
|
||||
continue;
|
||||
|
||||
// If we have grabbed a wrong code label which actually points to some
|
||||
// constant island inside the function, ignore this label and remove it
|
||||
// from the secondary entry point map.
|
||||
if (isStartOfConstantIsland(Offset)) {
|
||||
BC.SymbolToFunctionMap.erase(Label);
|
||||
removeSymbolFromSecondaryEntryPointMap(Label);
|
||||
// constant island inside the function, ignore this label.
|
||||
if (isStartOfConstantIsland(Offset))
|
||||
continue;
|
||||
}
|
||||
|
||||
BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
|
||||
"detected in function "
|
||||
|
@ -881,14 +881,9 @@ void RewriteInstance::discoverFileObjects() {
|
||||
// code section (see IHI0056B). $d identifies data contents.
|
||||
// Compilers usually merge multiple data objects in a single $d-$x interval,
|
||||
// but we need every data object to be marked with $d. Because of that we
|
||||
// create a vector of MarkerSyms with all locations of data objects.
|
||||
// keep track of marker symbols with all locations of data objects.
|
||||
|
||||
struct MarkerSym {
|
||||
uint64_t Address;
|
||||
MarkerSymType Type;
|
||||
};
|
||||
|
||||
std::vector<MarkerSym> SortedMarkerSymbols;
|
||||
DenseMap<uint64_t, MarkerSymType> MarkerSymbols;
|
||||
auto addExtraDataMarkerPerSymbol = [&]() {
|
||||
bool IsData = false;
|
||||
uint64_t LastAddr = 0;
|
||||
@ -912,14 +907,14 @@ void RewriteInstance::discoverFileObjects() {
|
||||
}
|
||||
|
||||
if (MarkerType != MarkerSymType::NONE) {
|
||||
SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType});
|
||||
MarkerSymbols[SymInfo.Address] = MarkerType;
|
||||
LastAddr = SymInfo.Address;
|
||||
IsData = MarkerType == MarkerSymType::DATA;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IsData) {
|
||||
SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA});
|
||||
MarkerSymbols[SymInfo.Address] = MarkerSymType::DATA;
|
||||
LastAddr = SymInfo.Address;
|
||||
}
|
||||
}
|
||||
@ -1284,27 +1279,24 @@ void RewriteInstance::discoverFileObjects() {
|
||||
BC->setHasSymbolsWithFileName(FileSymbols.size());
|
||||
|
||||
// Now that all the functions were created - adjust their boundaries.
|
||||
adjustFunctionBoundaries();
|
||||
adjustFunctionBoundaries(MarkerSymbols);
|
||||
|
||||
// Annotate functions with code/data markers in AArch64
|
||||
for (auto ISym = SortedMarkerSymbols.begin();
|
||||
ISym != SortedMarkerSymbols.end(); ++ISym) {
|
||||
|
||||
auto *BF =
|
||||
BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
|
||||
for (auto &[Address, Type] : MarkerSymbols) {
|
||||
auto *BF = BC->getBinaryFunctionContainingAddress(Address, true, true);
|
||||
|
||||
if (!BF) {
|
||||
// Stray marker
|
||||
continue;
|
||||
}
|
||||
const auto EntryOffset = ISym->Address - BF->getAddress();
|
||||
if (ISym->Type == MarkerSymType::CODE) {
|
||||
const auto EntryOffset = Address - BF->getAddress();
|
||||
if (Type == MarkerSymType::CODE) {
|
||||
BF->markCodeAtOffset(EntryOffset);
|
||||
continue;
|
||||
}
|
||||
if (ISym->Type == MarkerSymType::DATA) {
|
||||
if (Type == MarkerSymType::DATA) {
|
||||
BF->markDataAtOffset(EntryOffset);
|
||||
BC->AddressToConstantIslandMap[ISym->Address] = BF;
|
||||
BC->AddressToConstantIslandMap[Address] = BF;
|
||||
continue;
|
||||
}
|
||||
llvm_unreachable("Unknown marker");
|
||||
@ -1833,7 +1825,8 @@ void RewriteInstance::disassemblePLT() {
|
||||
}
|
||||
}
|
||||
|
||||
void RewriteInstance::adjustFunctionBoundaries() {
|
||||
void RewriteInstance::adjustFunctionBoundaries(
|
||||
DenseMap<uint64_t, MarkerSymType> &MarkerSyms) {
|
||||
for (auto BFI = BC->getBinaryFunctions().begin(),
|
||||
BFE = BC->getBinaryFunctions().end();
|
||||
BFI != BFE; ++BFI) {
|
||||
@ -1871,12 +1864,15 @@ void RewriteInstance::adjustFunctionBoundaries() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// This is potentially another entry point into the function.
|
||||
uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
|
||||
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
|
||||
<< Function << " at offset 0x"
|
||||
<< Twine::utohexstr(EntryOffset) << '\n');
|
||||
Function.addEntryPointAtOffset(EntryOffset);
|
||||
auto It = MarkerSyms.find(NextSymRefI->first);
|
||||
if (It == MarkerSyms.end() || It->second != MarkerSymType::DATA) {
|
||||
// This is potentially another entry point into the function.
|
||||
uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
|
||||
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
|
||||
<< Function << " at offset 0x"
|
||||
<< Twine::utohexstr(EntryOffset) << '\n');
|
||||
Function.addEntryPointAtOffset(EntryOffset);
|
||||
}
|
||||
|
||||
++NextSymRefI;
|
||||
}
|
||||
|
38
bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
Normal file
38
bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
Normal file
@ -0,0 +1,38 @@
|
||||
# This test is to ensure that we query data marker symbols to avoid
|
||||
# misidentifying constant data island symbol as extra entry point.
|
||||
|
||||
# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar
|
||||
# RUN: llvm-bolt %t.so -o %t.instr.so
|
||||
|
||||
.text
|
||||
.global _start
|
||||
.type _start, %function
|
||||
_start:
|
||||
ret
|
||||
|
||||
.text
|
||||
.global _foo
|
||||
.type _foo, %function
|
||||
_foo:
|
||||
cbz x1, _foo_2
|
||||
_foo_1:
|
||||
add x1, x2, x0
|
||||
b _foo
|
||||
_foo_2:
|
||||
ret
|
||||
|
||||
# None of these constant island symbols should be identified as extra entry
|
||||
# point for function `_foo'.
|
||||
.align 4
|
||||
_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80
|
||||
_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0
|
||||
_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0
|
||||
|
||||
.text
|
||||
.global _bar
|
||||
.type _bar, %function
|
||||
_bar:
|
||||
ret
|
||||
|
||||
# Dummy relocation to force relocation mode
|
||||
.reloc 0, R_AARCH64_NONE
|
Loading…
x
Reference in New Issue
Block a user