[LLDB][NativePDB] Estimate symbol sizes (#165727)

In #165604, a test was skipped on Windows, because the native PDB plugin
didn't set sizes on symbols. While the test isn't compiled with debug
info, it's linked with `-gdwarf`, causing a PDB to be created on
Windows. This PDB will only contain the public symbols (written by the
linker) and section information. The symbols themselves don't have a
size, however the DIA SDK sets a size for them.
It seems like, for these data symbols, the size given from DIA is the
distance to the next symbol (or the section end).

This PR implements the naive approach for the native plugin. The main
difference is in function/code symbols. There, DIA searches for a
corresponding `S_GPROC32` which have a "code size" that is sometimes
slightly smaller than the difference to the next symbol.
This commit is contained in:
nerix 2025-10-31 10:33:37 +01:00 committed by GitHub
parent f2443861d7
commit 44fefe70e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 68 additions and 34 deletions

View File

@ -1130,7 +1130,35 @@ void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {
if (!section_list)
return;
for (auto pid : m_index->publics().getPublicsTable()) {
PublicSym32 last_sym;
size_t last_sym_idx = 0;
lldb::SectionSP section_sp;
// To estimate the size of a symbol, we use the difference to the next symbol.
// If there's no next symbol or the section/segment changed, the symbol will
// take the remaining space. The estimate can be too high in case there's
// padding between symbols. This similar to the algorithm used by the DIA
// SDK.
auto finish_last_symbol = [&](const PublicSym32 *next) {
if (!section_sp)
return;
Symbol *last = symtab.SymbolAtIndex(last_sym_idx);
if (!last)
return;
if (next && last_sym.Segment == next->Segment) {
assert(last_sym.Offset <= next->Offset);
last->SetByteSize(next->Offset - last_sym.Offset);
} else {
// the last symbol was the last in its section
assert(section_sp->GetByteSize() >= last_sym.Offset);
assert(!next || next->Segment > last_sym.Segment);
last->SetByteSize(section_sp->GetByteSize() - last_sym.Offset);
}
};
// The address map is sorted by the address of a symbol.
for (auto pid : m_index->publics().getAddressMap()) {
PdbGlobalSymId global{pid, true};
CVSymbol sym = m_index->ReadSymbolRecord(global);
auto kind = sym.kind();
@ -1138,8 +1166,11 @@ void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {
continue;
PublicSym32 pub =
llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym));
finish_last_symbol(&pub);
if (!section_sp || last_sym.Segment != pub.Segment)
section_sp = section_list->FindSectionByID(pub.Segment);
auto section_sp = section_list->FindSectionByID(pub.Segment);
if (!section_sp)
continue;
@ -1148,20 +1179,24 @@ void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {
(pub.Flags & PublicSymFlags::Code) != PublicSymFlags::None)
type = eSymbolTypeCode;
symtab.AddSymbol(Symbol(/*symID=*/pid,
/*name=*/pub.Name,
/*type=*/type,
/*external=*/true,
/*is_debug=*/true,
/*is_trampoline=*/false,
/*is_artificial=*/false,
/*section_sp=*/section_sp,
/*value=*/pub.Offset,
/*size=*/0,
/*size_is_valid=*/false,
/*contains_linker_annotations=*/false,
/*flags=*/0));
last_sym_idx =
symtab.AddSymbol(Symbol(/*symID=*/pid,
/*name=*/pub.Name,
/*type=*/type,
/*external=*/true,
/*is_debug=*/true,
/*is_trampoline=*/false,
/*is_artificial=*/false,
/*section_sp=*/section_sp,
/*value=*/pub.Offset,
/*size=*/0,
/*size_is_valid=*/false,
/*contains_linker_annotations=*/false,
/*flags=*/0));
last_sym = pub;
}
finish_last_symbol(nullptr);
}
size_t SymbolFileNativePDB::ParseFunctions(CompileUnit &comp_unit) {

View File

@ -12,10 +12,6 @@ from lldbsuite.test import lldbutil
class MultipleSlidesTestCase(TestBase):
NO_DEBUG_INFO_TESTCASE = True
# The intermediate object main.o is compiled without debug info, but
# a.out is linked with `-gdwarf` on Windows. This creates a PDB.
# However, in the native PDB plugin, the symbols don't have a size.
@expectedFailureWindows
def test_mulitple_slides(self):
"""Test that a binary can be slid multiple times correctly."""
self.build()
@ -33,10 +29,13 @@ class MultipleSlidesTestCase(TestBase):
first_sym.GetEndAddress().GetOffset()
- first_sym.GetStartAddress().GetOffset()
)
int_size = target.FindFirstType("int").GetByteSize()
self.assertGreaterEqual(first_size, 2048 * int_size)
second_size = (
second_sym.GetEndAddress().GetOffset()
- second_sym.GetStartAddress().GetOffset()
)
self.assertGreaterEqual(second_size, 2048 * int_size)
# View the first element of `first` and `second` while
# they have no load address set.

View File

@ -42,18 +42,18 @@ int main(int argc, char **argv) {
return ns::a_function() + b.b_func();
}
// CHECK-DAG: Code {{.*}} main
// CHECK-DAG: Code {{.*}} ?b_func@?$B@F@ns@@QEBAHXZ
// CHECK-DAG: Code {{.*}} ?something@A@@QEAAXXZ
// CHECK-DAG: Code {{.*}} ??_GDyn@ns@@UEAAPEAXI@Z
// CHECK-DAG: Code {{.*}} ??2@YAPEAX_K@Z
// CHECK-DAG: Code {{.*}} ??3@YAXPEAX_K@Z
// CHECK-DAG: Code {{.*}} ?static_fn@C@?$B@H@ns@@SAHXZ
// CHECK-DAG: Code {{.*}} ?a_function@ns@@YAHXZ
// CHECK-DAG: Code {{.*}} ?static_fn@C@?$B@_N@ns@@SAHXZ
// CHECK-DAG: Code {{.*}} ??1Dyn@ns@@UEAA@XZ
// CHECK-DAG: Code {{.*}} ??0Dyn@ns@@QEAA@XZ
// CHECK-DAG: Data {{.*}} ?global_int@@3HA
// CHECK-DAG: Data {{.*}} ??_7Dyn@ns@@6B@
// CHECK-DAG: Data {{.*}} ?global_a@@3UA@@A
// CHECK-DAG: Data {{.*}} ?global_c@@3UC@?$B@_J@ns@@A
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 main
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?b_func@?$B@F@ns@@QEBAHXZ
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?something@A@@QEAAXXZ
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??_GDyn@ns@@UEAAPEAXI@Z
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??2@YAPEAX_K@Z
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??3@YAXPEAX_K@Z
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?static_fn@C@?$B@H@ns@@SAHXZ
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?a_function@ns@@YAHXZ
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?static_fn@C@?$B@_N@ns@@SAHXZ
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??1Dyn@ns@@UEAA@XZ
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??0Dyn@ns@@QEAA@XZ
// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?global_int@@3HA
// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??_7Dyn@ns@@6B@
// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?global_a@@3UA@@A
// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?global_c@@3UC@?$B@_J@ns@@A