[lldb] Fix source line annotations for libsanitizers traces (#154247)

When providing allocation and deallocation traces,
the ASan compiler-rt runtime already provides call
addresses (`TracePCType::Calls`).

On Darwin, system sanitizers (libsanitizers)
provides return address.  It also discards a few
non-user frames at the top of the stack, because
these internal libmalloc/libsanitizers stack
frames do not provide any value when diagnosing
memory errors.

Introduce and add handling for
`TracePCType::ReturnsNoZerothFrame` to cover this
case and enable libsanitizers traces line-level
testing.

rdar://157596927

---
Commit 1 is a mechanical refactoring to introduce
and adopt `TracePCType` enum to replace
`pcs_are_call_addresses` bool.  It preserve the
current behavior:
```
pcs_are_call_addresses:
  false  ->  TracePCType::Returns (default)
  true   ->  TracePCType::Calls
``` 

Best reviewed commit by commit.
This commit is contained in:
Julian Lettner 2025-08-20 14:33:27 -07:00 committed by GitHub
parent 7d33743324
commit 484d0408f9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 83 additions and 57 deletions

View File

@ -248,6 +248,22 @@ enum class IterationAction {
Stop,
};
/// Specifies the type of PCs when creating a `HistoryThread`.
/// - `Returns` - Usually, when LLDB unwinds the stack or we retrieve a stack
/// trace via `backtrace()` we are collecting return addresses (except for the
/// topmost frame which is the actual PC). LLDB then maps these return
/// addresses back to call addresses to give accurate source line annotations.
/// - `ReturnsNoZerothFrame` - Some trace providers (e.g., libsanitizers traces)
/// collect return addresses but prune the topmost frames, so we should skip
/// the special treatment of frame 0.
/// - `Calls` - Other trace providers (e.g., ASan compiler-rt runtime) already
/// perform this mapping, so we need to prevent LLDB from doing it again.
enum class HistoryPCType {
Returns, ///< PCs are return addresses, except for topmost frame.
ReturnsNoZerothFrame, ///< All PCs are return addresses.
Calls ///< PCs are call addresses.
};
inline std::string GetStatDescription(lldb_private::StatisticKind K) {
switch (K) {
case StatisticKind::ExpressionSuccessful:

View File

@ -266,9 +266,9 @@ InstrumentationRuntimeMainThreadChecker::GetBacktracesFromExtendedStopInfo(
// We gather symbolication addresses above, so no need for HistoryThread to
// try to infer the call addresses.
bool pcs_are_call_addresses = true;
ThreadSP new_thread_sp = std::make_shared<HistoryThread>(
*process_sp, tid, PCs, pcs_are_call_addresses);
auto pc_type = HistoryPCType::Calls;
ThreadSP new_thread_sp =
std::make_shared<HistoryThread>(*process_sp, tid, PCs, pc_type);
// Save this in the Process' ExtendedThreadList so a strong pointer retains
// the object

View File

@ -324,9 +324,9 @@ InstrumentationRuntimeUBSan::GetBacktracesFromExtendedStopInfo(
// We gather symbolication addresses above, so no need for HistoryThread to
// try to infer the call addresses.
bool pcs_are_call_addresses = true;
ThreadSP new_thread_sp = std::make_shared<HistoryThread>(
*process_sp, tid, PCs, pcs_are_call_addresses);
auto pc_type = HistoryPCType::Calls;
ThreadSP new_thread_sp =
std::make_shared<HistoryThread>(*process_sp, tid, PCs, pc_type);
std::string stop_reason_description = GetStopReasonDescription(info);
new_thread_sp->SetName(stop_reason_description.c_str());

View File

@ -83,7 +83,7 @@ ReportRetriever::RetrieveReportData(const ProcessSP process_sp) {
options.SetAutoApplyFixIts(false);
options.SetLanguage(eLanguageTypeObjC_plus_plus);
if (auto m = GetPreferredAsanModule(process_sp->GetTarget())) {
if (auto [m, _] = GetPreferredAsanModule(process_sp->GetTarget()); m) {
SymbolContextList sc_list;
sc_list.Append(SymbolContext(std::move(m)));
options.SetPreferredSymbolContexts(std::move(sc_list));

View File

@ -13,10 +13,12 @@
namespace lldb_private {
lldb::ModuleSP GetPreferredAsanModule(const Target &target) {
// Currently only supported on Darwin.
std::tuple<lldb::ModuleSP, HistoryPCType>
GetPreferredAsanModule(const Target &target) {
// Currently only Darwin provides ASan runtime support as part of the OS
// (libsanitizers).
if (!target.GetArchitecture().GetTriple().isOSDarwin())
return nullptr;
return {nullptr, HistoryPCType::Calls};
lldb::ModuleSP module;
llvm::Regex pattern(R"(libclang_rt\.asan_.*_dynamic\.dylib)");
@ -29,7 +31,16 @@ lldb::ModuleSP GetPreferredAsanModule(const Target &target) {
return IterationAction::Continue;
});
return module;
// `Calls` - The ASan compiler-rt runtime already massages the return
// addresses into call addresses, so we don't want LLDB's unwinder to try to
// locate the previous instruction again as this might lead to us reporting
// a different line.
// `ReturnsNoZerothFrame` - Darwin, but not ASan compiler-rt implies
// libsanitizers which collects return addresses. It also discards a few
// non-user frames at the top of the stack.
auto pc_type =
(module ? HistoryPCType::Calls : HistoryPCType::ReturnsNoZerothFrame);
return {module, pc_type};
}
} // namespace lldb_private

View File

@ -9,18 +9,20 @@
#ifndef LLDB_SOURCE_PLUGINS_INSTRUMENTATIONRUNTIME_UTILITY_UTILITY_H
#define LLDB_SOURCE_PLUGINS_INSTRUMENTATIONRUNTIME_UTILITY_UTILITY_H
#include <tuple>
#include "lldb/lldb-forward.h"
#include "lldb/lldb-private-enumerations.h"
namespace lldb_private {
class Target;
/// On Darwin, if LLDB loaded libclang_rt, it's coming from a locally built
/// compiler-rt, and we should prefer it in favour of the system sanitizers
/// when running InstrumentationRuntime utility expressions that use symbols
/// from the sanitizer libraries. This helper searches the target for such a
/// dylib. Returns nullptr if no such dylib was found.
lldb::ModuleSP GetPreferredAsanModule(const Target &target);
std::tuple<lldb::ModuleSP, HistoryPCType>
GetPreferredAsanModule(const Target &target);
} // namespace lldb_private

View File

@ -91,11 +91,9 @@ const char *memory_history_asan_command_format =
t;
)";
static void CreateHistoryThreadFromValueObject(ProcessSP process_sp,
ValueObjectSP return_value_sp,
const char *type,
const char *thread_name,
HistoryThreads &result) {
static void CreateHistoryThreadFromValueObject(
ProcessSP process_sp, ValueObjectSP return_value_sp, HistoryPCType pc_type,
const char *type, const char *thread_name, HistoryThreads &result) {
std::string count_path = "." + std::string(type) + "_count";
std::string tid_path = "." + std::string(type) + "_tid";
std::string trace_path = "." + std::string(type) + "_trace";
@ -128,12 +126,8 @@ static void CreateHistoryThreadFromValueObject(ProcessSP process_sp,
pcs.push_back(pc);
}
// The ASAN runtime already massages the return addresses into call
// addresses, we don't want LLDB's unwinder to try to locate the previous
// instruction again as this might lead to us reporting a different line.
bool pcs_are_call_addresses = true;
HistoryThread *history_thread =
new HistoryThread(*process_sp, tid, pcs, pcs_are_call_addresses);
new HistoryThread(*process_sp, tid, pcs, pc_type);
ThreadSP new_thread_sp(history_thread);
std::ostringstream thread_name_with_number;
thread_name_with_number << thread_name << " Thread " << tid;
@ -176,7 +170,8 @@ HistoryThreads MemoryHistoryASan::GetHistoryThreads(lldb::addr_t address) {
options.SetAutoApplyFixIts(false);
options.SetLanguage(eLanguageTypeObjC_plus_plus);
if (auto m = GetPreferredAsanModule(process_sp->GetTarget())) {
auto [m, pc_type] = GetPreferredAsanModule(process_sp->GetTarget());
if (m) {
SymbolContextList sc_list;
sc_list.Append(SymbolContext(std::move(m)));
options.SetPreferredSymbolContexts(std::move(sc_list));
@ -197,10 +192,10 @@ HistoryThreads MemoryHistoryASan::GetHistoryThreads(lldb::addr_t address) {
if (!return_value_sp)
return result;
CreateHistoryThreadFromValueObject(process_sp, return_value_sp, "free",
"Memory deallocated by", result);
CreateHistoryThreadFromValueObject(process_sp, return_value_sp, "alloc",
"Memory allocated by", result);
CreateHistoryThreadFromValueObject(process_sp, return_value_sp, pc_type,
"free", "Memory deallocated by", result);
CreateHistoryThreadFromValueObject(process_sp, return_value_sp, pc_type,
"alloc", "Memory allocated by", result);
return result;
}

View File

@ -27,13 +27,12 @@ using namespace lldb_private;
HistoryThread::HistoryThread(lldb_private::Process &process, lldb::tid_t tid,
std::vector<lldb::addr_t> pcs,
bool pcs_are_call_addresses)
HistoryPCType pc_type)
: Thread(process, tid, true), m_framelist_mutex(), m_framelist(),
m_pcs(pcs), m_extended_unwind_token(LLDB_INVALID_ADDRESS), m_queue_name(),
m_thread_name(), m_originating_unique_thread_id(tid),
m_queue_id(LLDB_INVALID_QUEUE_ID) {
m_unwinder_up =
std::make_unique<HistoryUnwind>(*this, pcs, pcs_are_call_addresses);
m_unwinder_up = std::make_unique<HistoryUnwind>(*this, pcs, pc_type);
Log *log = GetLog(LLDBLog::Object);
LLDB_LOGF(log, "%p HistoryThread::HistoryThread", static_cast<void *>(this));
}

View File

@ -27,14 +27,14 @@ namespace lldb_private {
/// process execution
///
/// This subclass of Thread is used to provide a backtrace from earlier in
/// process execution. It is given a backtrace list of pc addresses and it
/// will create stack frames for them.
/// process execution. It is given a backtrace list of pcs (return or call
/// addresses) and it will create stack frames for them.
class HistoryThread : public lldb_private::Thread {
public:
HistoryThread(lldb_private::Process &process, lldb::tid_t tid,
std::vector<lldb::addr_t> pcs,
bool pcs_are_call_addresses = false);
HistoryPCType pc_type = HistoryPCType::Returns);
~HistoryThread() override;

View File

@ -24,9 +24,8 @@ using namespace lldb_private;
// Constructor
HistoryUnwind::HistoryUnwind(Thread &thread, std::vector<lldb::addr_t> pcs,
bool pcs_are_call_addresses)
: Unwind(thread), m_pcs(pcs),
m_pcs_are_call_addresses(pcs_are_call_addresses) {}
HistoryPCType pc_type)
: Unwind(thread), m_pcs(pcs), m_pc_type(pc_type) {}
// Destructor
@ -52,6 +51,17 @@ HistoryUnwind::DoCreateRegisterContextForFrame(StackFrame *frame) {
return rctx;
}
static bool BehavesLikeZerothFrame(HistoryPCType pc_type, uint32_t frame_idx) {
switch (pc_type) {
case HistoryPCType::Returns:
return (frame_idx == 0);
case HistoryPCType::ReturnsNoZerothFrame:
return false;
case HistoryPCType::Calls:
return true;
}
}
bool HistoryUnwind::DoGetFrameInfoAtIndex(uint32_t frame_idx, lldb::addr_t &cfa,
lldb::addr_t &pc,
bool &behaves_like_zeroth_frame) {
@ -61,10 +71,7 @@ bool HistoryUnwind::DoGetFrameInfoAtIndex(uint32_t frame_idx, lldb::addr_t &cfa,
if (frame_idx < m_pcs.size()) {
cfa = frame_idx;
pc = m_pcs[frame_idx];
if (m_pcs_are_call_addresses)
behaves_like_zeroth_frame = true;
else
behaves_like_zeroth_frame = (frame_idx == 0);
behaves_like_zeroth_frame = BehavesLikeZerothFrame(m_pc_type, frame_idx);
return true;
}
return false;

View File

@ -19,7 +19,7 @@ namespace lldb_private {
class HistoryUnwind : public lldb_private::Unwind {
public:
HistoryUnwind(Thread &thread, std::vector<lldb::addr_t> pcs,
bool pcs_are_call_addresses = false);
HistoryPCType pc_type = HistoryPCType::Returns);
~HistoryUnwind() override;
@ -36,9 +36,7 @@ protected:
private:
std::vector<lldb::addr_t> m_pcs;
/// This boolean indicates that the PCs in the non-0 frames are call
/// addresses and not return addresses.
bool m_pcs_are_call_addresses;
HistoryPCType m_pc_type;
};
} // namespace lldb_private

View File

@ -544,9 +544,9 @@ ThreadSP SystemRuntimeMacOSX::GetExtendedBacktraceThread(ThreadSP real_thread,
if (!thread_extended_info->ForEach(extract_frame_pc))
return {};
originating_thread_sp =
std::make_shared<HistoryThread>(*m_process, real_thread->GetIndexID(),
app_specific_backtrace_pcs, true);
originating_thread_sp = std::make_shared<HistoryThread>(
*m_process, real_thread->GetIndexID(), app_specific_backtrace_pcs,
HistoryPCType::Calls);
originating_thread_sp->SetQueueName(type.AsCString());
}
return originating_thread_sp;

View File

@ -41,18 +41,16 @@ class MemoryHistoryTestCase(TestBase):
self.line_free = line_number("main.c", "// free line")
self.line_breakpoint = line_number("main.c", "// break line")
# Test line numbers: rdar://126237493
# for libsanitizers and remove `skip_line_numbers` parameter
def check_traces(self, skip_line_numbers=False):
def check_traces(self):
self.expect(
"memory history 'pointer'",
substrs=[
"Memory deallocated by Thread",
"a.out`f2",
"main.c" if skip_line_numbers else f"main.c:{self.line_free}",
f"main.c:{self.line_free}",
"Memory allocated by Thread",
"a.out`f1",
"main.c" if skip_line_numbers else f"main.c:{self.line_malloc}",
f"main.c:{self.line_malloc}",
],
)
@ -76,7 +74,7 @@ class MemoryHistoryTestCase(TestBase):
self.runCmd("env SanitizersAllocationTraces=all")
self.run_to_breakpoint(target)
self.check_traces(skip_line_numbers=True)
self.check_traces()
def libsanitizers_asan_tests(self):
target = self.createTestTarget()
@ -84,7 +82,7 @@ class MemoryHistoryTestCase(TestBase):
self.runCmd("env SanitizersAddress=1 MallocSanitizerZone=1")
self.run_to_breakpoint(target)
self.check_traces(skip_line_numbers=True)
self.check_traces()
self.runCmd("continue")
@ -94,7 +92,7 @@ class MemoryHistoryTestCase(TestBase):
"Process should be stopped due to ASan report",
substrs=["stopped", "stop reason = Use of deallocated memory"],
)
self.check_traces(skip_line_numbers=True)
self.check_traces()
# do the same using SB API
process = self.dbg.GetSelectedTarget().process