[DWARFLinker] Fix matching logic to remove type 1 missing offsets (#149618)

Second attempt to fix
https://discourse.llvm.org/t/rfc-new-dwarf-attribute-for-symbolication-of-merged-functions/79434/29?u=alx32

(First attempt: https://github.com/llvm/llvm-project/pull/143656)

Context: the sequence offset to row index we parsed may not be complete.
And we need to add manual matching to it.

https://github.com/llvm/llvm-project/pull/143656 attempts to do trivial
1:1 matching, however, sometimes they don't line up perfectly, as shown
below:
 
 ```
// While SeqOffToOrigRow parsed from CU could be the ground truth,
          // e.g.
          //
          // SeqOff     Row
          // 0x08        9
          // 0x14       15
          //
// The StmtAttrs and SeqStartRows may not match perfectly, e.g.
          //
          // StmtAttrs  SeqStartRows
          // 0x04        3
          // 0x08        5
          // 0x10        9
          // 0x12       11
          // 0x14       15
          //
// In this case, we don't want to assign 5 to 0x08, since we know 0x08
// maps to 9. If we do a dummy 1:1 mapping 0x10 will be mapped to 9
// which is incorrect. The expected behavior is ignore 5, realign the
          // table based on the result from the line table:
          //
          // StmtAttrs  SeqStartRows
          // 0x04        3
          //   --        5
          // 0x08        9 <- LineTableMapping ground truth
          // 0x10       11
          // 0x12       --
          // 0x14       15 <- LineTableMapping ground truth
 ```
 
In this case, we need to use the mapping we read from the line table as
a ground truth and organize them properly to prevent duplicated
offset/missing offset.
 
 Test:
 
 Updated the test case

---------

Signed-off-by: Peter Rong <PeterRong@meta.com>
This commit is contained in:
Peter Rong 2025-07-30 16:08:02 -07:00 committed by GitHub
parent e6f360b0ab
commit ed940d7228
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 626 additions and 352 deletions

View File

@ -413,6 +413,116 @@ static bool isTlsAddressCode(uint8_t DW_OP_Code) {
DW_OP_Code == dwarf::DW_OP_GNU_push_tls_address;
}
static void constructSeqOffsettoOrigRowMapping(
CompileUnit &Unit, const DWARFDebugLine::LineTable &LT,
DenseMap<size_t, unsigned> &SeqOffToOrigRow) {
// Use std::map for ordered iteration.
std::map<uint64_t, unsigned> LineTableMapping;
// First, trust the sequences that the DWARF parser did identify.
for (const DWARFDebugLine::Sequence &Seq : LT.Sequences)
LineTableMapping[Seq.StmtSeqOffset] = Seq.FirstRowIndex;
// Second, manually find sequence boundaries and match them to the
// sorted attributes to handle sequences the parser might have missed.
auto StmtAttrs = Unit.getStmtSeqListAttributes();
llvm::sort(StmtAttrs, [](const PatchLocation &A, const PatchLocation &B) {
return A.get() < B.get();
});
std::vector<size_t> SeqStartRows;
SeqStartRows.push_back(0);
for (auto [I, Row] : llvm::enumerate(ArrayRef(LT.Rows).drop_back()))
if (Row.EndSequence)
SeqStartRows.push_back(I + 1);
// While SeqOffToOrigRow parsed from CU could be the ground truth,
// e.g.
//
// SeqOff Row
// 0x08 9
// 0x14 15
//
// The StmtAttrs and SeqStartRows may not match perfectly, e.g.
//
// StmtAttrs SeqStartRows
// 0x04 3
// 0x08 5
// 0x10 9
// 0x12 11
// 0x14 15
//
// In this case, we don't want to assign 5 to 0x08, since we know 0x08
// maps to 9. If we do a dummy 1:1 mapping 0x10 will be mapped to 9
// which is incorrect. The expected behavior is ignore 5, realign the
// table based on the result from the line table:
//
// StmtAttrs SeqStartRows
// 0x04 3
// -- 5
// 0x08 9 <- LineTableMapping ground truth
// 0x10 11
// 0x12 --
// 0x14 15 <- LineTableMapping ground truth
ArrayRef StmtAttrsRef(StmtAttrs);
ArrayRef SeqStartRowsRef(SeqStartRows);
// Dummy last element to make sure StmtAttrsRef and SeqStartRowsRef always
// run out first.
constexpr size_t DummyKey = UINT64_MAX;
constexpr unsigned DummyVal = UINT32_MAX;
LineTableMapping[DummyKey] = DummyVal;
for (auto [NextSeqOff, NextRow] : LineTableMapping) {
auto StmtAttrSmallerThanNext = [NextSeqOff](const PatchLocation &SA) {
return SA.get() < NextSeqOff;
};
auto SeqStartSmallerThanNext = [NextRow](const size_t &Row) {
return Row < NextRow;
};
// If both StmtAttrs and SeqStartRows points to value not in
// the LineTableMapping yet, we do a dummy one to one mapping and
// move the pointer.
while (!StmtAttrsRef.empty() && !SeqStartRowsRef.empty() &&
StmtAttrSmallerThanNext(StmtAttrsRef.front()) &&
SeqStartSmallerThanNext(SeqStartRowsRef.front())) {
SeqOffToOrigRow[StmtAttrsRef.consume_front().get()] =
SeqStartRowsRef.consume_front();
}
// One of the pointer points to the value at or past Next in the
// LineTableMapping, We move the pointer to re-align with the
// LineTableMapping
StmtAttrsRef = StmtAttrsRef.drop_while(StmtAttrSmallerThanNext);
SeqStartRowsRef = SeqStartRowsRef.drop_while(SeqStartSmallerThanNext);
// Use the LineTableMapping's result as the ground truth and move
// on.
if (NextSeqOff != DummyKey) {
SeqOffToOrigRow[NextSeqOff] = NextRow;
}
// Move the pointers if they are pointed at Next.
// It is possible that they point to later entries in LineTableMapping.
// Therefore we only increment the pointers after we validate they are
// pointing to the `Next` entry. e.g.
//
// LineTableMapping
// SeqOff Row
// 0x08 9 <- NextSeqOff/NextRow
// 0x14 15
//
// StmtAttrs SeqStartRows
// 0x14 13 <- StmtAttrsRef.front() / SeqStartRowsRef.front()
// 0x16 15
// -- 17
if (!StmtAttrsRef.empty() && StmtAttrsRef.front().get() == NextSeqOff)
StmtAttrsRef.consume_front();
if (!SeqStartRowsRef.empty() && SeqStartRowsRef.front() == NextRow)
SeqStartRowsRef.consume_front();
}
}
std::pair<bool, std::optional<int64_t>>
DWARFLinker::getVariableRelocAdjustment(AddressesMap &RelocMgr,
const DWARFDie &DIE) {
@ -2297,8 +2407,12 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) {
// Create a map of stmt sequence offsets to original row indices.
DenseMap<uint64_t, unsigned> SeqOffToOrigRow;
for (const DWARFDebugLine::Sequence &Seq : LT->Sequences)
SeqOffToOrigRow[Seq.StmtSeqOffset] = Seq.FirstRowIndex;
// The DWARF parser's discovery of sequences can be incomplete. To
// ensure all DW_AT_LLVM_stmt_sequence attributes can be patched, we
// build a map from both the parser's results and a manual
// reconstruction.
if (!LT->Rows.empty())
constructSeqOffsettoOrigRowMapping(Unit, *LT, SeqOffToOrigRow);
// Create a map of original row indices to new row indices.
DenseMap<size_t, size_t> OrigRowToNewRow;

File diff suppressed because it is too large Load Diff