[lldb] [disassembler] chore: add GetVariableAnnotations to SBInstruction api (#177676)

## Description
Contribution to this topic [Rich Disassembler for
LLDB](https://discourse.llvm.org/t/rich-disassembler-for-lldb/76952),
this part.
```
The rich disassembler output should be exposed as structured data and made available through LLDB’s scripting API so more tooling could be built on top of this
```

----

This pr replaces #174847

As was suggested in [this
comment](https://github.com/llvm/llvm-project/pull/174847#issuecomment-3757015552),
implement access to variable annotations from `SBInstruction` class
itself.

Notes:
-   did run black formatter on the python file;

## Testing
Run test with
```sh
./build/bin/lldb-dotest -v -p TestVariableAnnotationsDisassembler.py lldb/test/API/functionalities/disassembler-variables
```

all tests (9 existing + 1 newly added) are passing

<details>
<summary>screenshot 2026-01-23</summary>

build from the latest commit  08f00730b5768a8e3f7039d810084fabaaa24470

<img width="1506" height="562" alt="image"
src="https://github.com/user-attachments/assets/69516353-3432-47df-ae45-c40b51ec14c4"
/>

</details>

<details>
<summary>screenshot 2026-01-29</summary>

build from the latest commit  f48a1a2c10f96a457ca6844be2ccc9406d3d57a0

<img width="1232" height="740" alt="image"
src="https://github.com/user-attachments/assets/9d104ce6-36c3-430b-98fe-f028f83a6b6d"
/>


</details>

---------

Signed-off-by: Nikita B <n2h9z4@gmail.com>
This commit is contained in:
n2h9 2026-01-29 20:54:42 +01:00 committed by GitHub
parent 8122d0e4bc
commit a617b901cd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 218 additions and 1 deletions

View File

@ -22,6 +22,42 @@ STRING_EXTENSION_OUTSIDE(SBInstruction)
def __load_adrr_property__ (self):
return self.GetComment (target)
def variable_annotations(self):
"""Get variable annotations as a Python list of dictionaries.
Returns:
List of dictionaries, each containing variable annotation data
"""
structured_data = self.GetVariableAnnotations()
if not structured_data.IsValid():
return []
annotations = []
for i in range(structured_data.GetSize()):
item = structured_data.GetItemAtIndex(i)
if item.GetType() != eStructuredDataTypeDictionary:
continue
annotation = {}
integer_fields = ['start_address', 'end_address', 'register_kind', 'decl_line']
string_fields = ['variable_name', 'location_description', 'decl_file', 'type_name']
for field in integer_fields:
value = item.GetValueForKey(field)
if value.IsValid():
annotation[field] = value.GetUnsignedIntegerValue()
for field in string_fields:
value = item.GetValueForKey(field)
if value.IsValid():
annotation[field] = value.GetStringValue(1024)
annotations.append(annotation)
return annotations
mnemonic = property(__mnemonic_property__, None, doc='''A read only property that returns the mnemonic for this instruction as a string.''')
operands = property(__operands_property__, None, doc='''A read only property that returns the operands for this instruction as a string.''')
comment = property(__comment_property__, None, doc='''A read only property that returns the comment for this instruction as a string.''')

View File

@ -73,6 +73,22 @@ public:
bool TestEmulation(lldb::SBStream &output_stream, const char *test_file);
/// Get variable annotations for this instruction as structured data.
/// Returns an array of dictionaries, each containing:
/// - "variable_name": string name of the variable
/// - "location_description": string description of where variable is stored
/// ("RDI", "R15", "undef", etc.)
/// - "start_address": unsigned integer address where this annotation becomes
/// valid
/// - "end_address": unsigned integer address where this annotation becomes
/// invalid
/// - "register_kind": unsigned integer indicating the register numbering
/// scheme
/// - "decl_file": string path to the file where variable is declared
/// - "decl_line": unsigned integer line number where variable is declared
/// - "type_name": string type name of the variable
lldb::SBStructuredData GetVariableAnnotations();
protected:
friend class SBInstructionList;

View File

@ -156,6 +156,7 @@ protected:
friend class lldb_private::python::SWIGBridge;
friend class lldb_private::lua::SWIGBridge;
friend class SBCommandInterpreter;
friend class SBInstruction;
SBStructuredData(const lldb_private::StructuredDataImpl &impl);

View File

@ -20,6 +20,7 @@
#include "lldb/Utility/ArchSpec.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/FileSpec.h"
#include "lldb/Utility/StructuredData.h"
#include "lldb/lldb-defines.h"
#include "lldb/lldb-forward.h"
#include "lldb/lldb-private-enumerations.h"
@ -233,6 +234,10 @@ public:
static const char *GetNameForInstructionControlFlowKind(
lldb::InstructionControlFlowKind instruction_control_flow_kind);
/// Get variable annotations for this instruction as structured data.
/// Returns an array of dictionaries to be used in SBInstruction class.
StructuredData::ArraySP GetVariableAnnotations();
protected:
Address m_address; // The section offset address of this instruction
// We include an address class in the Instruction class to

View File

@ -14,6 +14,7 @@
#include "lldb/API/SBFrame.h"
#include "lldb/API/SBStream.h"
#include "lldb/API/SBStructuredData.h"
#include "lldb/API/SBTarget.h"
#include "lldb/Core/Disassembler.h"
#include "lldb/Core/EmulateInstruction.h"
@ -26,6 +27,7 @@
#include "lldb/Utility/ArchSpec.h"
#include "lldb/Utility/DataBufferHeap.h"
#include "lldb/Utility/DataExtractor.h"
#include "lldb/Utility/StructuredData.h"
#include <memory>
@ -163,7 +165,8 @@ const char *SBInstruction::GetComment(SBTarget target) {
return ConstString(inst_sp->GetComment(&exe_ctx)).GetCString();
}
lldb::InstructionControlFlowKind SBInstruction::GetControlFlowKind(lldb::SBTarget target) {
lldb::InstructionControlFlowKind
SBInstruction::GetControlFlowKind(lldb::SBTarget target) {
LLDB_INSTRUMENT_VA(this, target);
lldb::InstructionSP inst_sp(GetOpaque());
@ -348,3 +351,21 @@ bool SBInstruction::TestEmulation(lldb::SBStream &output_stream,
return inst_sp->TestEmulation(output_stream.ref(), test_file);
return false;
}
SBStructuredData SBInstruction::GetVariableAnnotations() {
LLDB_INSTRUMENT_VA(this);
SBStructuredData result;
if (!m_opaque_sp || !m_opaque_sp->IsValid())
return result;
lldb::InstructionSP inst_sp = m_opaque_sp->GetSP();
if (!inst_sp)
return result;
StructuredData::ArraySP array_sp = inst_sp->GetVariableAnnotations();
result.m_impl_up->SetObjectSP(array_sp);
return result;
}

View File

@ -1189,6 +1189,47 @@ uint32_t Instruction::GetData(DataExtractor &data) {
return m_opcode.GetData(data);
}
StructuredData::ArraySP Instruction::GetVariableAnnotations() {
VariableAnnotator annotator;
std::vector<VariableAnnotation> annotations =
annotator.AnnotateStructured(*this);
StructuredData::ArraySP array_sp = std::make_shared<StructuredData::Array>();
for (const VariableAnnotation &ann : annotations) {
StructuredData::DictionarySP dict_sp =
std::make_shared<StructuredData::Dictionary>();
dict_sp->AddStringItem("variable_name", ann.variable_name);
dict_sp->AddStringItem("location_description", ann.location_description);
if (ann.address_range.has_value()) {
const auto &range = *ann.address_range;
dict_sp->AddItem("start_address",
std::make_shared<StructuredData::UnsignedInteger>(
range.GetBaseAddress().GetFileAddress()));
dict_sp->AddItem(
"end_address",
std::make_shared<StructuredData::UnsignedInteger>(
range.GetBaseAddress().GetFileAddress() + range.GetByteSize()));
}
dict_sp->AddItem(
"register_kind",
std::make_shared<StructuredData::UnsignedInteger>(ann.register_kind));
if (ann.decl_file.has_value())
dict_sp->AddStringItem("decl_file", *ann.decl_file);
if (ann.decl_line.has_value())
dict_sp->AddItem(
"decl_line",
std::make_shared<StructuredData::UnsignedInteger>(*ann.decl_line));
if (ann.type_name.has_value())
dict_sp->AddStringItem("type_name", *ann.type_name);
array_sp->AddItem(dict_sp);
}
return array_sp;
}
InstructionList::InstructionList() : m_instructions() {}
InstructionList::~InstructionList() = default;

View File

@ -116,3 +116,100 @@ class TestVariableAnnotationsDisassembler(TestBase):
print(out)
self.assertRegex(out, r"\b(i|argc)\s*=\s*(DW_OP_reg\d+\b|R[A-Z0-9]+)")
self.assertNotIn("<decoding error>", out)
@no_debug_info_test
@skipIf(archs=no_match(["x86_64"]))
def test_structured_annotations_api(self):
"""Test SBInstruction.variable_annotations() Python API."""
obj = self._build_obj("d_original_example.o")
target = self._create_target(obj)
main_symbols = target.FindSymbols("main")
self.assertTrue(
main_symbols.IsValid() and main_symbols.GetSize() > 0,
"Could not find 'main' symbol",
)
main_symbol = main_symbols.GetContextAtIndex(0).GetSymbol()
start_addr = main_symbol.GetStartAddress()
self.assertTrue(start_addr.IsValid(), "Invalid start address for main")
instructions = target.ReadInstructions(start_addr, 16)
self.assertGreater(instructions.GetSize(), 0, "No instructions read")
if self.TraceOn():
print(
f"\nTesting SBInstruction.variable_annotations on {instructions.GetSize()} instructions"
)
expected_vars = ["argc", "argv", "i"]
# Track current state of variables across instructions.
found_variables = set()
# Test each instruction.
for i in range(instructions.GetSize()):
inst = instructions.GetInstructionAtIndex(i)
self.assertTrue(inst.IsValid(), f"Invalid instruction at index {i}")
# Get annotations as Python list of dicts.
annotations = inst.variable_annotations()
for ann in annotations:
# Validate required fields are present.
self.assertIn("variable_name", ann, "Missing 'variable_name' field")
self.assertIn(
"location_description", ann, "Missing 'location_description' field"
)
self.assertIn("start_address", ann, "Missing 'start_address' field")
self.assertIn("end_address", ann, "Missing 'end_address' field")
self.assertIn("register_kind", ann, "Missing 'register_kind' field")
var_name = ann["variable_name"]
# Validate types and values.
self.assertIsInstance(var_name, str, "variable_name should be string")
self.assertIsInstance(
ann["location_description"],
str,
"location_description should be string",
)
self.assertIsInstance(
ann["start_address"], int, "start_address should be integer"
)
self.assertIsInstance(
ann["end_address"], int, "end_address should be integer"
)
self.assertIsInstance(
ann["register_kind"], int, "register_kind should be integer"
)
self.assertGreater(
len(var_name), 0, "variable_name should not be empty"
)
self.assertGreater(
len(ann["location_description"]),
0,
"location_description should not be empty",
)
self.assertGreater(
ann["end_address"],
ann["start_address"],
"end_address should be > start_address",
)
self.assertIn(
var_name, expected_vars, f"Unexpected variable name: {var_name}"
)
found_variables.add(var_name)
# Validate we find all expected variables.
self.assertEqual(
found_variables,
set(expected_vars),
f"Did not find all expected variables. Expected: {expected_vars}, find: {found_variables}",
)
if self.TraceOn():
print(f"\nTest complete. All expected variables found: {found_variables}")