[BOLT] Store FileSymRefs in a multimap

With aggressive ICF, it's possible to have different local symbols
(under different FILE symbols) to be mapped to the same address.

FileSymRefs only keeps a single SymbolRef per address, which prevents
fragment matching from finding the correct symbol to perform parent
function lookup.

Work around this issue by switching FileSymRefs to a multimap. In
future, uses of FileSymRefs can be replaced with SortedSymbols which
keeps essentially the same information.

Test Plan: added ambiguous_fragment.test

Reviewers: dcci, ayermolo, maksfb, rafaelauler

Reviewed By: rafaelauler

Pull Request: https://github.com/llvm/llvm-project/pull/98992
This commit is contained in:
Amir Ayupov 2024-07-16 22:14:43 -07:00 committed by GitHub
parent f0ac8903ea
commit 3fe50b6dde
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 104 additions and 4 deletions

View File

@ -490,7 +490,7 @@ private:
std::unordered_map<const MCSymbol *, uint32_t> SymbolIndex;
/// Store all non-zero symbols in this map for a quick address lookup.
std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
std::multimap<uint64_t, llvm::object::SymbolRef> FileSymRefs;
/// FILE symbols used for disambiguating split function parents.
std::vector<ELFSymbolRef> FileSymbols;

View File

@ -886,7 +886,7 @@ void RewriteInstance::discoverFileObjects() {
if (SymName == "__hot_start" || SymName == "__hot_end")
continue;
FileSymRefs[SymbolAddress] = Symbol;
FileSymRefs.emplace(SymbolAddress, Symbol);
// Skip section symbols that will be registered by disassemblePLT().
if (SymbolType == SymbolRef::ST_Debug) {
@ -1052,7 +1052,9 @@ void RewriteInstance::discoverFileObjects() {
// Remove the symbol from FileSymRefs so that we can skip it from
// in the future.
auto SI = FileSymRefs.find(SymbolAddress);
auto SI = llvm::find_if(
llvm::make_range(FileSymRefs.equal_range(SymbolAddress)),
[&](auto SymIt) { return SymIt.second == Symbol; });
assert(SI != FileSymRefs.end() && "symbol expected to be present");
assert(SI->second == Symbol && "wrong symbol found");
FileSymRefs.erase(SI);
@ -1260,6 +1262,7 @@ void RewriteInstance::discoverFileObjects() {
registerFragments();
FileSymbols.clear();
FileSymRefs.clear();
discoverBOLTReserved();
}
@ -1433,7 +1436,11 @@ void RewriteInstance::registerFragments() {
const uint64_t Address = BF->getAddress();
// Get fragment's own symbol
const auto SymIt = FileSymRefs.find(Address);
const auto SymIt = llvm::find_if(
llvm::make_range(FileSymRefs.equal_range(Address)), [&](auto SI) {
StringRef Name = cantFail(SI.second.getName());
return Name.contains(ParentName);
});
if (SymIt == FileSymRefs.end()) {
BC->errs()
<< "BOLT-ERROR: symbol lookup failed for function at address 0x"

View File

@ -0,0 +1,54 @@
#--- file1
.file "file1.cpp"
.section .text.cold
.type __func.cold.0, @function
__func.cold.0:
ud2
.size __func.cold.0, .-__func.cold.0
.section .text
.type __func, @function
__func:
ud2
.size __func, .-__func
#--- file2
.file "file2.cpp"
.section .text.cold
.type __func.cold.0, @function
__func.cold.0:
ud2
.size __func.cold.0, .-__func.cold.0
.section .text
.type __func, @function
__func:
ud2
.size __func, .-__func
#--- file3
.file "file3.cpp"
.section .text.cold
.type __func.cold.0, @function
__func.cold.0:
ud2
.size __func.cold.0, .-__func.cold.0
.section .text
.type __func, @function
__func:
ud2
.size __func, .-__func
#--- file4
.file "file4.cpp"
.section .text.cold
.type __func.cold.0, @function
__func.cold.0:
ud2
.size __func.cold.0, .-__func.cold.0
.section .text
.type __func, @function
__func:
ud2
.size __func, .-__func
#--- file5
.file "bolt-pseudo.o"

View File

@ -0,0 +1,6 @@
SECTIONS {
. = 0x10000;
.text : { *(.text) }
. = 0x20000;
.text.cold : { *(.text.cold) }
}

View File

@ -0,0 +1,33 @@
## This reproduces a bug with misidentification of a parent fragment.
RUN: split-file %p/Inputs/ambiguous_fragment.s %t
RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file1 -o %t1.o
RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file2 -o %t2.o
RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file3 -o %t3.o
RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file4 -o %t4.o
RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file5 -o %t5.o
RUN: ld.lld %t1.o %t2.o %t3.o %t4.o %t5.o -o %t.exe \
RUN: --script %p/Inputs/ambiguous_fragment.script
RUN: llvm-objcopy %t.exe %t.exe2 \
RUN: --add-symbol=_Zfunc.cold.0=.text.cold:0x4,local,function \
RUN: --add-symbol=_Zfunc=.text:0xc,function
RUN: llvm-objdump --syms %t.exe2 | FileCheck %s --check-prefix=CHECK-SYMS
RUN: link_fdata %s %t.exe2 %t.preagg PREAGG
RUN: perf2bolt -v=1 %t.exe2 -p %t.preagg --pa -o %t.fdata -w %t.yaml | FileCheck %s
# PREAGG: B X:0 #__func# 1 0
CHECK-SYMS: 0000000000020004 {{.*}} __func.cold.0
CHECK-SYMS: 0000000000020004 {{.*}} _Zfunc.cold.0
CHECK-NOT: BOLT-ERROR: parent function not found for __func.cold.0
CHECK: BOLT-INFO: marking __func.cold.0/3(*4) as a fragment of __func/4(*3)
CHECK-NEXT: BOLT-INFO: marking __func.cold.0/1(*2) as a fragment of __func/1(*2)
CHECK-NEXT: BOLT-INFO: marking __func.cold.0/2(*2) as a fragment of __func/2(*2)
CHECK-NEXT: BOLT-INFO: marking __func.cold.0/3(*4) as a fragment of __func/3(*2)
CHECK-NEXT: BOLT-INFO: marking __func.cold.0/4(*2) as a fragment of __func/4(*3)