[BOLT] Check if symbol is in data area of function (#160143)

There are cases in which `getEntryIDForSymbol` is called, where the
given Symbol is in a constant island, and so BOLT can not find its
function. This causes BOLT to reach `llvm_unreachable("symbol not
found")` and crash. This patch adds a check that avoids this crash.
This commit is contained in:
Asher Dobrescu 2026-03-06 10:37:54 +00:00 committed by GitHub
parent a28699bdcc
commit 7bce678ec1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 51 additions and 8 deletions

View File

@ -683,7 +683,8 @@ private:
///
/// Prefer to use BinaryContext::getFunctionForSymbol(EntrySymbol, &ID)
/// instead of calling this function directly.
uint64_t getEntryIDForSymbol(const MCSymbol *EntrySymbol) const;
std::optional<uint64_t>
getEntryIDForSymbol(const MCSymbol *EntrySymbol) const;
/// If the function represents a secondary split function fragment, set its
/// parent fragment to \p BF.

View File

@ -2523,8 +2523,10 @@ BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
return nullptr;
BinaryFunction *BF = BFI->second;
if (EntryDesc)
*EntryDesc = BF->getEntryIDForSymbol(Symbol);
if (EntryDesc) {
std::optional<uint64_t> EntryID = BF->getEntryIDForSymbol(Symbol);
*EntryDesc = EntryID.value_or(0);
}
return BF;
}

View File

@ -1908,7 +1908,7 @@ bool BinaryFunction::scanExternalRefs() {
}
bool BinaryFunction::validateInternalBranches() {
if (!isSimple() || TrapsOnEntry)
if (!hasInstructions() || !isSimple() || TrapsOnEntry)
return true;
for (const auto &KV : Labels) {
@ -3885,8 +3885,9 @@ MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
return nullptr;
}
uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
if (!isMultiEntry())
std::optional<uint64_t>
BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
if (!isMultiEntry() || !Symbol)
return 0;
for (const MCSymbol *FunctionSymbol : getSymbols())
@ -3912,8 +3913,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
return NumEntries;
++NumEntries;
}
llvm_unreachable("symbol not found");
return std::nullopt;
}
bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {

View File

@ -0,0 +1,40 @@
// This test checks that when looking for a function corresponding to a
// symbol, BOLT is not looking through a data area (constant island).
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
// Before adding a check for constant islands, BOLT would exit with an error
// of the form: "symbol not found" and throw an LLVM UNREACHABLE error.
# CHECK-NOT: symbol not found
# CHECK-NOT: UNREACHABLE
// Now BOLT throws a warning and does not crash.
# CHECK: BOLT-WARNING: corrupted control flow detected in function main{{.*}}:
# CHECK-SAME: an external branch/call targets an invalid instruction in
# CHECK-SAME: function foo{{.*}} at address {{.*}}; ignoring both functions
# CHECK: BOLT-WARNING: ignoring entry point at address 0x{{[0-9a-f]+}} in constant island of function foo{{.*}}
.text
.global main
.type main, %function
main:
add x0, x1, x1
bl first_block
ret
.type foo, %function
foo:
nop
$d:
first_block:
add x0, x1, x1
bl second_block
ret
$x:
second_block:
add x0, x1, x1
ret