[BOLT] Retain certain local symbols (#184074)
BOLT currently strips all STT_NOTYPE STB_LOCAL zero-sized symbols that fall inside function bodies. Certain such symbols are named labels (loop markers and subroutine entry points) or local function symbols in hand-written assembly. We now keep them in local symbol table in BOLT processed binaries for better symbolication.
This commit is contained in:
parent
17e783b241
commit
95685ca52e
@ -887,9 +887,11 @@ public:
|
||||
|
||||
bool isRISCV() const { return TheTriple->getArch() == llvm::Triple::riscv64; }
|
||||
|
||||
// AArch64-specific functions to check if symbol is used to delimit
|
||||
// AArch64/RISC-V functions to check if symbol is used to delimit
|
||||
// code/data in .text. Code is marked by $x, data by $d.
|
||||
MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
|
||||
MarkerSymType getMarkerType(unsigned SymbolType, uint64_t SymbolSize,
|
||||
StringRef SymbolName) const;
|
||||
bool isMarker(const SymbolRef &Symbol) const;
|
||||
|
||||
/// Iterate over all BinaryData.
|
||||
|
||||
@ -2039,35 +2039,40 @@ void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
|
||||
}
|
||||
}
|
||||
|
||||
MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
|
||||
MarkerSymType BinaryContext::getMarkerType(unsigned SymbolType,
|
||||
uint64_t SymbolSize,
|
||||
StringRef SymbolName) const {
|
||||
// For aarch64 and riscv, the ABI defines mapping symbols so we identify data
|
||||
// in the code section (see IHI0056B). $x identifies a symbol starting code or
|
||||
// the end of a data chunk inside code, $d identifies start of data.
|
||||
if (isX86() || ELFSymbolRef(Symbol).getSize())
|
||||
if (isX86() || SymbolSize)
|
||||
return MarkerSymType::NONE;
|
||||
|
||||
Expected<StringRef> NameOrError = Symbol.getName();
|
||||
Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
|
||||
|
||||
if (!TypeOrError || !NameOrError)
|
||||
if (SymbolType != ELF::STT_NOTYPE)
|
||||
return MarkerSymType::NONE;
|
||||
|
||||
if (*TypeOrError != SymbolRef::ST_Unknown)
|
||||
return MarkerSymType::NONE;
|
||||
|
||||
if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
|
||||
if (SymbolName == "$x" || SymbolName.starts_with("$x."))
|
||||
return MarkerSymType::CODE;
|
||||
|
||||
// $x<ISA>
|
||||
if (isRISCV() && NameOrError->starts_with("$x"))
|
||||
if (isRISCV() && SymbolName.starts_with("$x"))
|
||||
return MarkerSymType::CODE;
|
||||
|
||||
if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
|
||||
if (SymbolName == "$d" || SymbolName.starts_with("$d."))
|
||||
return MarkerSymType::DATA;
|
||||
|
||||
return MarkerSymType::NONE;
|
||||
}
|
||||
|
||||
MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
|
||||
Expected<StringRef> NameOrError = Symbol.getName();
|
||||
if (!NameOrError)
|
||||
return MarkerSymType::NONE;
|
||||
|
||||
return getMarkerType(ELFSymbolRef(Symbol).getELFType(),
|
||||
ELFSymbolRef(Symbol).getSize(), *NameOrError);
|
||||
}
|
||||
|
||||
bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
|
||||
return getMarkerType(Symbol) != MarkerSymType::NONE;
|
||||
}
|
||||
|
||||
@ -5352,13 +5352,27 @@ void RewriteInstance::updateELFSymbolTable(
|
||||
} else {
|
||||
// Check if the function symbol matches address inside a function, i.e.
|
||||
// it marks a secondary entry point.
|
||||
// Also look up local NOTYPE symbols inside functions so we can
|
||||
// update their addresses to reflect the output layout.
|
||||
// Skip AArch64/RISC-V marker symbols ($d, $x) inside functions —
|
||||
// BOLT generates its own via addExtraSymbols.
|
||||
auto IsMarkerSymbol = [&]() {
|
||||
return BC->getMarkerType(Symbol.getType(), Symbol.st_size,
|
||||
*SymbolName) != MarkerSymType::NONE;
|
||||
};
|
||||
const bool IsLocalLabel = Symbol.getType() == ELF::STT_NOTYPE &&
|
||||
Symbol.getBinding() == ELF::STB_LOCAL &&
|
||||
Symbol.st_size == 0 && !IsMarkerSymbol();
|
||||
Function =
|
||||
(Symbol.getType() == ELF::STT_FUNC)
|
||||
(Symbol.getType() == ELF::STT_FUNC || IsLocalLabel)
|
||||
? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
|
||||
/*CheckPastEnd=*/false,
|
||||
/*UseMaxSize=*/true)
|
||||
: nullptr;
|
||||
|
||||
assert((!Function || !IsLocalLabel || !Function->isFolded()) &&
|
||||
"Local label inside ICF-folded function");
|
||||
|
||||
if (Function && Function->isEmitted()) {
|
||||
assert(Function->getLayout().isHotColdSplit() &&
|
||||
"Adding symbols based on cold fragment when there are more than "
|
||||
@ -5366,6 +5380,12 @@ void RewriteInstance::updateELFSymbolTable(
|
||||
const uint64_t OutputAddress =
|
||||
Function->translateInputToOutputAddress(Symbol.st_value);
|
||||
|
||||
// Remove symbols that cannot be mapped to the output, e.g.
|
||||
// data-in-code labels (jump tables) whose addresses BOLT
|
||||
// does not track.
|
||||
if (!OutputAddress)
|
||||
continue;
|
||||
|
||||
NewSymbol.st_value = OutputAddress;
|
||||
// Force secondary entry points to have zero size.
|
||||
NewSymbol.st_size = 0;
|
||||
@ -5414,19 +5434,14 @@ void RewriteInstance::updateELFSymbolTable(
|
||||
NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
|
||||
}
|
||||
|
||||
// Detect local syms in the text section that we didn't update
|
||||
// and that were preserved by the linker to support relocations against
|
||||
// .text. Remove them from the symtab.
|
||||
if (Symbol.getType() == ELF::STT_NOTYPE &&
|
||||
Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) {
|
||||
if (BC->getBinaryFunctionContainingAddress(Symbol.st_value,
|
||||
/*CheckPastEnd=*/false,
|
||||
/*UseMaxSize=*/true)) {
|
||||
// Can only delete the symbol if not patching. Such symbols should
|
||||
// not exist in the dynamic symbol table.
|
||||
assert(!IsDynSym && "cannot delete symbol");
|
||||
continue;
|
||||
}
|
||||
// Drop AArch64/RISC-V marker symbols ($d, $x) inside functions —
|
||||
// BOLT generates its own via addExtraSymbols.
|
||||
if (IsMarkerSymbol() &&
|
||||
BC->getBinaryFunctionContainingAddress(Symbol.st_value,
|
||||
/*CheckPastEnd=*/false,
|
||||
/*UseMaxSize=*/true)) {
|
||||
assert(!IsDynSym && "cannot delete symbol");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -30,8 +30,10 @@ immediate_increment:
|
||||
|
||||
# CHECK: <immediate_increment>:
|
||||
# CHECK-NEXT: {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
|
||||
# CHECK: <.exit0>:
|
||||
# CHECK-NEXT: {{.*}} mov x0, #0x2 // =2
|
||||
# CHECK-NEXT: {{.*}} ret
|
||||
# CHECK: <.cold0>:
|
||||
# CHECK-NEXT: [[ADDR0]]: {{.*}} mov x0, #0x1 // =1
|
||||
# CHECK-NEXT: {{.*}} ret
|
||||
|
||||
@ -52,8 +54,10 @@ immediate_decrement:
|
||||
|
||||
# CHECK: <immediate_decrement>:
|
||||
# CHECK-NEXT: {{.*}} cbhi x0, #0x0, 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
|
||||
# CHECK: <.exit1>:
|
||||
# CHECK-NEXT: {{.*}} mov x0, #0x2 // =2
|
||||
# CHECK-NEXT: {{.*}} ret
|
||||
# CHECK: <.cold1>:
|
||||
# CHECK-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
|
||||
# CHECK-NEXT: {{.*}} ret
|
||||
|
||||
@ -74,8 +78,10 @@ register_swap:
|
||||
|
||||
# CHECK: <register_swap>:
|
||||
# CHECK-NEXT: {{.*}} cbgt x1, x0, 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
|
||||
# CHECK: <.exit2>:
|
||||
# CHECK-NEXT: {{.*}} mov x0, #0x2 // =2
|
||||
# CHECK-NEXT: {{.*}} ret
|
||||
# CHECK: <.cold2>:
|
||||
# CHECK-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
|
||||
# CHECK-NEXT: {{.*}} ret
|
||||
|
||||
@ -99,8 +105,10 @@ irreversible:
|
||||
# CHECK: <irreversible>:
|
||||
# CHECK-NEXT: {{.*}} cbgt x0, #0x3f, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
|
||||
# CHECK-NEXT: {{.*}} b 0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
|
||||
# CHECK: <.exit3>:
|
||||
# CHECK-NEXT: [[ADDR3]]: {{.*}} mov x0, #0x2 // =2
|
||||
# CHECK-NEXT: {{.*}} ret
|
||||
# CHECK: <.cold3>:
|
||||
# CHECK-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x1 // =1
|
||||
# CHECK-NEXT: {{.*}} ret
|
||||
|
||||
|
||||
@ -41,8 +41,10 @@ reorder_blocks:
|
||||
|
||||
# CHECK: <reorder_blocks>:
|
||||
# CHECK-NEXT: {{.*}} cbgt x0, #0x0, 0x[[ADDR:[0-9a-f]+]] <{{.*}}>
|
||||
# CHECK: <.hot_exit>:
|
||||
# CHECK-NEXT: {{.*}} mov x0, #0x2 // =2
|
||||
# CHECK-NEXT: {{.*}} ret
|
||||
# CHECK: <.cold_exit>:
|
||||
# CHECK-NEXT: [[ADDR]]: {{.*}} mov x0, #0x1 // =1
|
||||
# CHECK-NEXT: {{.*}} ret
|
||||
|
||||
|
||||
30
bolt/test/AArch64/retain-local-symbols.s
Normal file
30
bolt/test/AArch64/retain-local-symbols.s
Normal file
@ -0,0 +1,30 @@
|
||||
## Check that BOLT retains named local symbols (assembly labels) inside
|
||||
## functions and updates their addresses to reflect the output layout.
|
||||
|
||||
# RUN: %clang %cflags %s -o %t.exe -Wl,-q
|
||||
# RUN: llvm-bolt %t.exe -o %t.bolt -lite=false
|
||||
# RUN: llvm-nm -n %t.bolt | FileCheck %s
|
||||
|
||||
# CHECK: T _start
|
||||
# CHECK: t loop_start
|
||||
# CHECK: t loop_end
|
||||
# CHECK: t helper
|
||||
|
||||
.text
|
||||
.global _start
|
||||
.type _start, %function
|
||||
_start:
|
||||
mov x0, #10
|
||||
bl helper
|
||||
loop_start:
|
||||
sub x0, x0, #1
|
||||
cbnz x0, loop_start
|
||||
loop_end:
|
||||
ret
|
||||
|
||||
helper:
|
||||
add x0, x0, #1
|
||||
ret
|
||||
|
||||
## Force relocation mode.
|
||||
.reloc 0, R_AARCH64_NONE
|
||||
@ -8,7 +8,7 @@ RUN: llvm-objdump -d --disassemble-symbols=use_avx512 %t | \
|
||||
RUN: FileCheck %s --check-prefix=CHECK-DIS-NO-TRAP
|
||||
|
||||
RUN: llvm-bolt %t --trap-avx512=1 -o %t.bolt --lite=0 2>&1 | FileCheck %s
|
||||
RUN: llvm-objdump -d --disassemble-symbols=use_avx512 %t.bolt | \
|
||||
RUN: llvm-objdump -d --disassemble-symbols=use_avx512,secondary_entry %t.bolt | \
|
||||
RUN: FileCheck %s --check-prefix=CHECK-DIS
|
||||
|
||||
RUN: llvm-bolt %t --trap-avx512=0 -o %t.bolt --lite=0
|
||||
@ -20,6 +20,7 @@ CHECK: BOLT-WARNING: 1 function will trap on entry
|
||||
## Check that we have two ud2 instructions - one per entry.
|
||||
CHECK-DIS: use_avx512
|
||||
CHECK-DIS-NEXT: ud2
|
||||
CHECK-DIS: <secondary_entry>:
|
||||
CHECK-DIS-NEXT: ud2
|
||||
|
||||
## Check that we generate correct AVX-512
|
||||
|
||||
@ -5,13 +5,14 @@
|
||||
# RUN: %clang %cflags -fPIC -pie %s -o %t.exe -nostdlib -Wl,-q
|
||||
# RUN: llvm-bolt %t.exe -o %t.bolt > %t.out.txt
|
||||
# RUN: llvm-readelf -r %t.bolt >> %t.out.txt
|
||||
# RUN: llvm-objdump --disassemble-symbols=chain %t.bolt >> %t.out.txt
|
||||
# RUN: llvm-objdump --disassemble-symbols=chain,Label %t.bolt >> %t.out.txt
|
||||
# RUN: FileCheck %s --input-file=%t.out.txt
|
||||
|
||||
## Check if the new address in `chain` is correctly updated by BOLT
|
||||
# CHECK: Relocation section '.rela.dyn' at offset 0x{{.*}} contains 1 entries:
|
||||
# CHECK: {{.*}} R_X86_64_RELATIVE [[#%x,ADDR:]]
|
||||
# CHECK: [[#ADDR]]: c3 retq
|
||||
# CHECK: <Label>:
|
||||
# CHECK-NEXT: [[#ADDR]]: c3 retq
|
||||
.text
|
||||
.type chain, @function
|
||||
chain:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user