From 5c4f506cca6b6684adf69ac118028cb8b186d2e8 Mon Sep 17 00:00:00 2001 From: YongKang Zhu Date: Wed, 20 Aug 2025 14:18:56 -0700 Subject: [PATCH] [BOLT] Validate extra entry point by querying data marker symbols (#154611) Look up marker symbols and decide whether candidate is really extra entry point in `adjustFunctionBoundaries()`. --- bolt/include/bolt/Core/BinaryFunction.h | 5 -- bolt/include/bolt/Rewrite/RewriteInstance.h | 2 +- bolt/lib/Core/BinaryFunction.cpp | 8 +--- bolt/lib/Rewrite/RewriteInstance.cpp | 48 +++++++++---------- ...data-marker-invalidates-extra-entrypoint.s | 38 +++++++++++++++ 5 files changed, 63 insertions(+), 38 deletions(-) create mode 100644 bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index ae580520b911..b59926cc7557 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -1196,11 +1196,6 @@ public: return getSecondaryEntryPointSymbol(BB.getLabel()); } - /// Remove a label from the secondary entry point map. - void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) { - SecondaryEntryPoints.erase(Label); - } - /// Return true if the basic block is an entry point into the function /// (either primary or secondary). bool isEntryPoint(const BinaryBasicBlock &BB) const { diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 91d62a78de39..19dcce8205eb 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -241,7 +241,7 @@ private: /// Adjust function sizes and set proper maximum size values after the whole /// symbol table has been processed. - void adjustFunctionBoundaries(); + void adjustFunctionBoundaries(DenseMap &MarkerSyms); /// Make .eh_frame section relocatable. void relocateEHFrameSection(); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index eec68ff5a5fc..8f494f105fbb 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1915,13 +1915,9 @@ void BinaryFunction::postProcessEntryPoints() { continue; // If we have grabbed a wrong code label which actually points to some - // constant island inside the function, ignore this label and remove it - // from the secondary entry point map. - if (isStartOfConstantIsland(Offset)) { - BC.SymbolToFunctionMap.erase(Label); - removeSymbolFromSecondaryEntryPointMap(Label); + // constant island inside the function, ignore this label. + if (isStartOfConstantIsland(Offset)) continue; - } BC.errs() << "BOLT-WARNING: reference in the middle of instruction " "detected in function " diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index b2056ba2380f..79daa38142ae 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -881,14 +881,9 @@ void RewriteInstance::discoverFileObjects() { // code section (see IHI0056B). $d identifies data contents. // Compilers usually merge multiple data objects in a single $d-$x interval, // but we need every data object to be marked with $d. Because of that we - // create a vector of MarkerSyms with all locations of data objects. + // keep track of marker symbols with all locations of data objects. - struct MarkerSym { - uint64_t Address; - MarkerSymType Type; - }; - - std::vector SortedMarkerSymbols; + DenseMap MarkerSymbols; auto addExtraDataMarkerPerSymbol = [&]() { bool IsData = false; uint64_t LastAddr = 0; @@ -912,14 +907,14 @@ void RewriteInstance::discoverFileObjects() { } if (MarkerType != MarkerSymType::NONE) { - SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType}); + MarkerSymbols[SymInfo.Address] = MarkerType; LastAddr = SymInfo.Address; IsData = MarkerType == MarkerSymType::DATA; continue; } if (IsData) { - SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA}); + MarkerSymbols[SymInfo.Address] = MarkerSymType::DATA; LastAddr = SymInfo.Address; } } @@ -1284,27 +1279,24 @@ void RewriteInstance::discoverFileObjects() { BC->setHasSymbolsWithFileName(FileSymbols.size()); // Now that all the functions were created - adjust their boundaries. - adjustFunctionBoundaries(); + adjustFunctionBoundaries(MarkerSymbols); // Annotate functions with code/data markers in AArch64 - for (auto ISym = SortedMarkerSymbols.begin(); - ISym != SortedMarkerSymbols.end(); ++ISym) { - - auto *BF = - BC->getBinaryFunctionContainingAddress(ISym->Address, true, true); + for (auto &[Address, Type] : MarkerSymbols) { + auto *BF = BC->getBinaryFunctionContainingAddress(Address, true, true); if (!BF) { // Stray marker continue; } - const auto EntryOffset = ISym->Address - BF->getAddress(); - if (ISym->Type == MarkerSymType::CODE) { + const auto EntryOffset = Address - BF->getAddress(); + if (Type == MarkerSymType::CODE) { BF->markCodeAtOffset(EntryOffset); continue; } - if (ISym->Type == MarkerSymType::DATA) { + if (Type == MarkerSymType::DATA) { BF->markDataAtOffset(EntryOffset); - BC->AddressToConstantIslandMap[ISym->Address] = BF; + BC->AddressToConstantIslandMap[Address] = BF; continue; } llvm_unreachable("Unknown marker"); @@ -1833,7 +1825,8 @@ void RewriteInstance::disassemblePLT() { } } -void RewriteInstance::adjustFunctionBoundaries() { +void RewriteInstance::adjustFunctionBoundaries( + DenseMap &MarkerSyms) { for (auto BFI = BC->getBinaryFunctions().begin(), BFE = BC->getBinaryFunctions().end(); BFI != BFE; ++BFI) { @@ -1871,12 +1864,15 @@ void RewriteInstance::adjustFunctionBoundaries() { continue; } - // This is potentially another entry point into the function. - uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " - << Function << " at offset 0x" - << Twine::utohexstr(EntryOffset) << '\n'); - Function.addEntryPointAtOffset(EntryOffset); + auto It = MarkerSyms.find(NextSymRefI->first); + if (It == MarkerSyms.end() || It->second != MarkerSymType::DATA) { + // This is potentially another entry point into the function. + uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " + << Function << " at offset 0x" + << Twine::utohexstr(EntryOffset) << '\n'); + Function.addEntryPointAtOffset(EntryOffset); + } ++NextSymRefI; } diff --git a/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s new file mode 100644 index 000000000000..3bcbcbba8a38 --- /dev/null +++ b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s @@ -0,0 +1,38 @@ +# This test is to ensure that we query data marker symbols to avoid +# misidentifying constant data island symbol as extra entry point. + +# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar +# RUN: llvm-bolt %t.so -o %t.instr.so + + .text + .global _start + .type _start, %function +_start: + ret + + .text + .global _foo + .type _foo, %function +_foo: + cbz x1, _foo_2 +_foo_1: + add x1, x2, x0 + b _foo +_foo_2: + ret + +# None of these constant island symbols should be identified as extra entry +# point for function `_foo'. + .align 4 +_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80 +_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0 +_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0 + + .text + .global _bar + .type _bar, %function +_bar: + ret + + # Dummy relocation to force relocation mode + .reloc 0, R_AARCH64_NONE