**Context** Follow-up to [#147460](https://github.com/llvm/llvm-project/pull/147460), which added the ability to surface register-resident variable locations. This PR moves the annotation logic out of `Instruction::Dump()` and into `Disassembler::PrintInstructions()`, and adds lightweight state tracking so we only print changes at range starts and when variables go out of scope. --- ## What this does While iterating the instructions for a function, we maintain a “live variable map” keyed by `lldb::user_id_t` (the `Variable`’s ID) to remember each variable’s last emitted location string. For each instruction: - **New (or newly visible) variable** → print `name = <location>` once at the start of its DWARF location range, cache it. - **Location changed** (e.g., DWARF range switched to a different register/const) → print the updated mapping. - **Out of scope** (was tracked previously but not found for the current PC) → print `name = <undef>` and drop it. This produces **concise, stateful annotations** that highlight variable lifetime transitions without spamming every line. --- ## Why in `PrintInstructions()`? - Keeps `Instruction` stateless and avoids changing the `Instruction::Dump()` virtual API. - Makes it straightforward to diff state across instructions (`prev → current`) inside the single driver loop. --- ## How it works (high-level) 1. For the current PC, get in-scope variables via `StackFrame::GetInScopeVariableList(/*get_parent=*/true)`. 2. For each `Variable`, query `DWARFExpressionList::GetExpressionEntryAtAddress(func_load_addr, current_pc)` (added in #144238). 3. If the entry exists, call `DumpLocation(..., eDescriptionLevelBrief, abi)` to get a short, ABI-aware location string (e.g., `DW_OP_reg3 RBX → RBX`). 4. Compare against the last emitted location in the live map: - If not present → emit `name = <location>` and record it. - If different → emit updated mapping and record it. 5. After processing current in-scope variables, compute the set difference vs. the previous map and emit `name = <undef>` for any that disappeared. Internally: - We respect file↔load address translation already provided by `DWARFExpressionList`. - We reuse the ABI to map LLVM register numbers to arch register names. --- ## Example output (x86_64, simplified) ``` -> 0x55c6f5f6a140 <+0>: cmpl $0x2, %edi ; argc = RDI, argv = RSI 0x55c6f5f6a143 <+3>: jl 0x55c6f5f6a176 ; <+54> at d_original_example.c:6:3 0x55c6f5f6a145 <+5>: pushq %r15 0x55c6f5f6a147 <+7>: pushq %r14 0x55c6f5f6a149 <+9>: pushq %rbx 0x55c6f5f6a14a <+10>: movq %rsi, %rbx 0x55c6f5f6a14d <+13>: movl %edi, %r14d 0x55c6f5f6a150 <+16>: movl $0x1, %r15d ; argc = R14 0x55c6f5f6a156 <+22>: nopw %cs:(%rax,%rax) ; i = R15, argv = RBX 0x55c6f5f6a160 <+32>: movq (%rbx,%r15,8), %rdi 0x55c6f5f6a164 <+36>: callq 0x55c6f5f6a030 ; symbol stub for: puts 0x55c6f5f6a169 <+41>: incq %r15 0x55c6f5f6a16c <+44>: cmpq %r15, %r14 0x55c6f5f6a16f <+47>: jne 0x55c6f5f6a160 ; <+32> at d_original_example.c:5:10 0x55c6f5f6a171 <+49>: popq %rbx ; i = <undef> 0x55c6f5f6a172 <+50>: popq %r14 ; argv = RSI 0x55c6f5f6a174 <+52>: popq %r15 ; argc = RDI 0x55c6f5f6a176 <+54>: xorl %eax, %eax 0x55c6f5f6a178 <+56>: retq ``` Only transitions are shown: the start of a location, changes, and end-of-lifetime. --- ## Scope & limitations (by design) - Handles **simple locations** first (registers, const-in-register cases surfaced by `DumpLocation`). - **Memory/composite locations** are out of scope for this PR. - Annotations appear **only at range boundaries** (start/change/end) to minimize noise. - Output is **target-independent**; register names come from the target ABI. ## Implementation notes - All annotation printing now happens in `Disassembler::PrintInstructions()`. - Uses `std::unordered_map<lldb::user_id_t, std::string>` as the live map. - No persistent state across calls; the map is rebuilt while walking instruction by instruction. - **No changes** to the `Instruction` interface. --- ## Requested feedback - Placement and wording of the `<undef>` marker. - Whether we should optionally gate this behind a setting (currently always on when disassembling with an `ExecutionContext`). - Preference for immediate inclusion of tests vs. follow-up patch. --- Thanks for reviewing! Happy to adjust behavior/format based on feedback. --------- Co-authored-by: Jonas Devlieghere <jonas@devlieghere.com> Co-authored-by: Adrian Prantl <adrian.prantl@gmail.com>
290 lines
14 KiB
ArmAsm
290 lines
14 KiB
ArmAsm
/* Original C (for context):
|
|
* __attribute__((noinline))
|
|
* int main(int argc, char **argv) {
|
|
* int i = argc; // i in a reg (DW_OP_regN)
|
|
* asm volatile("" :: "r"(i)); // keep i live here
|
|
* i = 0; // i becomes const 0 (DW_OP_constu 0, stack_value)
|
|
* asm volatile("" :: "r"(i)); // keep the const range materialized
|
|
* return 0; // i ends -> <undef> after its range
|
|
* }
|
|
*/
|
|
|
|
.file "seed_reg_const_undef.c"
|
|
.text
|
|
.globl main # -- Begin function main
|
|
.p2align 4
|
|
.type main,@function
|
|
main: # @main
|
|
.Lfunc_begin0:
|
|
.file 0 "." "seed_reg_const_undef.c" md5 0x5e8dbf089d1bd72d395da802210b3138
|
|
.loc 0 3 0 # seed_reg_const_undef.c:3:0
|
|
.cfi_startproc
|
|
# %bb.0: # %entry
|
|
#DEBUG_VALUE: main:argc <- $edi
|
|
#DEBUG_VALUE: main:argv <- $rsi
|
|
pushq %rbp
|
|
.cfi_def_cfa_offset 16
|
|
.cfi_offset %rbp, -16
|
|
movq %rsp, %rbp
|
|
.cfi_def_cfa_register %rbp
|
|
.Ltmp0:
|
|
#DEBUG_VALUE: main:i <- $edi
|
|
.loc 0 5 3 prologue_end # seed_reg_const_undef.c:5:3
|
|
#APP
|
|
#NO_APP
|
|
.Ltmp1:
|
|
#DEBUG_VALUE: main:i <- 0
|
|
.loc 0 7 3 # seed_reg_const_undef.c:7:3
|
|
xorl %eax, %eax
|
|
#APP
|
|
#NO_APP
|
|
.loc 0 8 3 # seed_reg_const_undef.c:8:3
|
|
xorl %eax, %eax
|
|
.loc 0 8 3 epilogue_begin is_stmt 0 # seed_reg_const_undef.c:8:3
|
|
popq %rbp
|
|
.cfi_def_cfa %rsp, 8
|
|
retq
|
|
.Ltmp2:
|
|
.Lfunc_end0:
|
|
.size main, .Lfunc_end0-main
|
|
.cfi_endproc
|
|
# -- End function
|
|
.section .debug_loclists,"",@progbits
|
|
.long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length
|
|
.Ldebug_list_header_start0:
|
|
.short 5 # Version
|
|
.byte 8 # Address size
|
|
.byte 0 # Segment selector size
|
|
.long 1 # Offset entry count
|
|
.Lloclists_table_base0:
|
|
.long .Ldebug_loc0-.Lloclists_table_base0
|
|
.Ldebug_loc0:
|
|
.byte 4 # DW_LLE_offset_pair
|
|
.uleb128 .Ltmp0-.Lfunc_begin0 # starting offset
|
|
.uleb128 .Ltmp1-.Lfunc_begin0 # ending offset
|
|
.byte 1 # Loc expr size
|
|
.byte 85 # super-register DW_OP_reg5
|
|
.byte 4 # DW_LLE_offset_pair
|
|
.uleb128 .Ltmp1-.Lfunc_begin0 # starting offset
|
|
.uleb128 .Lfunc_end0-.Lfunc_begin0 # ending offset
|
|
.byte 3 # Loc expr size
|
|
.byte 17 # DW_OP_consts
|
|
.byte 0 # 0
|
|
.byte 159 # DW_OP_stack_value
|
|
.byte 0 # DW_LLE_end_of_list
|
|
.Ldebug_list_header_end0:
|
|
.section .debug_abbrev,"",@progbits
|
|
.byte 1 # Abbreviation Code
|
|
.byte 17 # DW_TAG_compile_unit
|
|
.byte 1 # DW_CHILDREN_yes
|
|
.byte 37 # DW_AT_producer
|
|
.byte 37 # DW_FORM_strx1
|
|
.byte 19 # DW_AT_language
|
|
.byte 5 # DW_FORM_data2
|
|
.byte 3 # DW_AT_name
|
|
.byte 37 # DW_FORM_strx1
|
|
.byte 114 # DW_AT_str_offsets_base
|
|
.byte 23 # DW_FORM_sec_offset
|
|
.byte 16 # DW_AT_stmt_list
|
|
.byte 23 # DW_FORM_sec_offset
|
|
.byte 27 # DW_AT_comp_dir
|
|
.byte 37 # DW_FORM_strx1
|
|
.byte 17 # DW_AT_low_pc
|
|
.byte 27 # DW_FORM_addrx
|
|
.byte 18 # DW_AT_high_pc
|
|
.byte 6 # DW_FORM_data4
|
|
.byte 115 # DW_AT_addr_base
|
|
.byte 23 # DW_FORM_sec_offset
|
|
.ascii "\214\001" # DW_AT_loclists_base
|
|
.byte 23 # DW_FORM_sec_offset
|
|
.byte 0 # EOM(1)
|
|
.byte 0 # EOM(2)
|
|
.byte 2 # Abbreviation Code
|
|
.byte 46 # DW_TAG_subprogram
|
|
.byte 1 # DW_CHILDREN_yes
|
|
.byte 17 # DW_AT_low_pc
|
|
.byte 27 # DW_FORM_addrx
|
|
.byte 18 # DW_AT_high_pc
|
|
.byte 6 # DW_FORM_data4
|
|
.byte 64 # DW_AT_frame_base
|
|
.byte 24 # DW_FORM_exprloc
|
|
.byte 122 # DW_AT_call_all_calls
|
|
.byte 25 # DW_FORM_flag_present
|
|
.byte 3 # DW_AT_name
|
|
.byte 37 # DW_FORM_strx1
|
|
.byte 58 # DW_AT_decl_file
|
|
.byte 11 # DW_FORM_data1
|
|
.byte 59 # DW_AT_decl_line
|
|
.byte 11 # DW_FORM_data1
|
|
.byte 39 # DW_AT_prototyped
|
|
.byte 25 # DW_FORM_flag_present
|
|
.byte 73 # DW_AT_type
|
|
.byte 19 # DW_FORM_ref4
|
|
.byte 63 # DW_AT_external
|
|
.byte 25 # DW_FORM_flag_present
|
|
.byte 0 # EOM(1)
|
|
.byte 0 # EOM(2)
|
|
.byte 3 # Abbreviation Code
|
|
.byte 5 # DW_TAG_formal_parameter
|
|
.byte 0 # DW_CHILDREN_no
|
|
.byte 2 # DW_AT_location
|
|
.byte 24 # DW_FORM_exprloc
|
|
.byte 3 # DW_AT_name
|
|
.byte 37 # DW_FORM_strx1
|
|
.byte 58 # DW_AT_decl_file
|
|
.byte 11 # DW_FORM_data1
|
|
.byte 59 # DW_AT_decl_line
|
|
.byte 11 # DW_FORM_data1
|
|
.byte 73 # DW_AT_type
|
|
.byte 19 # DW_FORM_ref4
|
|
.byte 0 # EOM(1)
|
|
.byte 0 # EOM(2)
|
|
.byte 4 # Abbreviation Code
|
|
.byte 52 # DW_TAG_variable
|
|
.byte 0 # DW_CHILDREN_no
|
|
.byte 2 # DW_AT_location
|
|
.byte 34 # DW_FORM_loclistx
|
|
.byte 3 # DW_AT_name
|
|
.byte 37 # DW_FORM_strx1
|
|
.byte 58 # DW_AT_decl_file
|
|
.byte 11 # DW_FORM_data1
|
|
.byte 59 # DW_AT_decl_line
|
|
.byte 11 # DW_FORM_data1
|
|
.byte 73 # DW_AT_type
|
|
.byte 19 # DW_FORM_ref4
|
|
.byte 0 # EOM(1)
|
|
.byte 0 # EOM(2)
|
|
.byte 5 # Abbreviation Code
|
|
.byte 36 # DW_TAG_base_type
|
|
.byte 0 # DW_CHILDREN_no
|
|
.byte 3 # DW_AT_name
|
|
.byte 37 # DW_FORM_strx1
|
|
.byte 62 # DW_AT_encoding
|
|
.byte 11 # DW_FORM_data1
|
|
.byte 11 # DW_AT_byte_size
|
|
.byte 11 # DW_FORM_data1
|
|
.byte 0 # EOM(1)
|
|
.byte 0 # EOM(2)
|
|
.byte 6 # Abbreviation Code
|
|
.byte 15 # DW_TAG_pointer_type
|
|
.byte 0 # DW_CHILDREN_no
|
|
.byte 73 # DW_AT_type
|
|
.byte 19 # DW_FORM_ref4
|
|
.byte 0 # EOM(1)
|
|
.byte 0 # EOM(2)
|
|
.byte 0 # EOM(3)
|
|
.section .debug_info,"",@progbits
|
|
.Lcu_begin0:
|
|
.long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
|
|
.Ldebug_info_start0:
|
|
.short 5 # DWARF version number
|
|
.byte 1 # DWARF Unit Type
|
|
.byte 8 # Address Size (in bytes)
|
|
.long .debug_abbrev # Offset Into Abbrev. Section
|
|
.byte 1 # Abbrev [1] 0xc:0x5b DW_TAG_compile_unit
|
|
.byte 0 # DW_AT_producer
|
|
.short 29 # DW_AT_language
|
|
.byte 1 # DW_AT_name
|
|
.long .Lstr_offsets_base0 # DW_AT_str_offsets_base
|
|
.long .Lline_table_start0 # DW_AT_stmt_list
|
|
.byte 2 # DW_AT_comp_dir
|
|
.byte 0 # DW_AT_low_pc
|
|
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
|
|
.long .Laddr_table_base0 # DW_AT_addr_base
|
|
.long .Lloclists_table_base0 # DW_AT_loclists_base
|
|
.byte 2 # Abbrev [2] 0x27:0x2d DW_TAG_subprogram
|
|
.byte 0 # DW_AT_low_pc
|
|
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
|
|
.byte 1 # DW_AT_frame_base
|
|
.byte 86
|
|
# DW_AT_call_all_calls
|
|
.byte 3 # DW_AT_name
|
|
.byte 0 # DW_AT_decl_file
|
|
.byte 3 # DW_AT_decl_line
|
|
# DW_AT_prototyped
|
|
.long 84 # DW_AT_type
|
|
# DW_AT_external
|
|
.byte 3 # Abbrev [3] 0x36:0xa DW_TAG_formal_parameter
|
|
.byte 1 # DW_AT_location
|
|
.byte 85
|
|
.byte 5 # DW_AT_name
|
|
.byte 0 # DW_AT_decl_file
|
|
.byte 3 # DW_AT_decl_line
|
|
.long 84 # DW_AT_type
|
|
.byte 3 # Abbrev [3] 0x40:0xa DW_TAG_formal_parameter
|
|
.byte 1 # DW_AT_location
|
|
.byte 84
|
|
.byte 6 # DW_AT_name
|
|
.byte 0 # DW_AT_decl_file
|
|
.byte 3 # DW_AT_decl_line
|
|
.long 88 # DW_AT_type
|
|
.byte 4 # Abbrev [4] 0x4a:0x9 DW_TAG_variable
|
|
.byte 0 # DW_AT_location
|
|
.byte 8 # DW_AT_name
|
|
.byte 0 # DW_AT_decl_file
|
|
.byte 4 # DW_AT_decl_line
|
|
.long 84 # DW_AT_type
|
|
.byte 0 # End Of Children Mark
|
|
.byte 5 # Abbrev [5] 0x54:0x4 DW_TAG_base_type
|
|
.byte 4 # DW_AT_name
|
|
.byte 5 # DW_AT_encoding
|
|
.byte 4 # DW_AT_byte_size
|
|
.byte 6 # Abbrev [6] 0x58:0x5 DW_TAG_pointer_type
|
|
.long 93 # DW_AT_type
|
|
.byte 6 # Abbrev [6] 0x5d:0x5 DW_TAG_pointer_type
|
|
.long 98 # DW_AT_type
|
|
.byte 5 # Abbrev [5] 0x62:0x4 DW_TAG_base_type
|
|
.byte 7 # DW_AT_name
|
|
.byte 6 # DW_AT_encoding
|
|
.byte 1 # DW_AT_byte_size
|
|
.byte 0 # End Of Children Mark
|
|
.Ldebug_info_end0:
|
|
.section .debug_str_offsets,"",@progbits
|
|
.long 40 # Length of String Offsets Set
|
|
.short 5
|
|
.short 0
|
|
.Lstr_offsets_base0:
|
|
.section .debug_str,"MS",@progbits,1
|
|
.Linfo_string0:
|
|
.asciz "clang version 22.0.0git (https://github.com/UltimateForce21/llvm-project.git 79c0a9e1e7da0f727c41d27c9c6ff8a28bb7d06f)" # string offset=0
|
|
.Linfo_string1:
|
|
.asciz "seed_reg_const_undef.c" # string offset=119
|
|
.Linfo_string2:
|
|
.asciz "." # string offset=142
|
|
.Linfo_string3:
|
|
.asciz "main" # string offset=144
|
|
.Linfo_string4:
|
|
.asciz "int" # string offset=149
|
|
.Linfo_string5:
|
|
.asciz "argc" # string offset=153
|
|
.Linfo_string6:
|
|
.asciz "argv" # string offset=158
|
|
.Linfo_string7:
|
|
.asciz "char" # string offset=163
|
|
.Linfo_string8:
|
|
.asciz "i" # string offset=168
|
|
.section .debug_str_offsets,"",@progbits
|
|
.long .Linfo_string0
|
|
.long .Linfo_string1
|
|
.long .Linfo_string2
|
|
.long .Linfo_string3
|
|
.long .Linfo_string4
|
|
.long .Linfo_string5
|
|
.long .Linfo_string6
|
|
.long .Linfo_string7
|
|
.long .Linfo_string8
|
|
.section .debug_addr,"",@progbits
|
|
.long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
|
|
.Ldebug_addr_start0:
|
|
.short 5 # DWARF version number
|
|
.byte 8 # Address size
|
|
.byte 0 # Segment selector size
|
|
.Laddr_table_base0:
|
|
.quad .Lfunc_begin0
|
|
.Ldebug_addr_end0:
|
|
.ident "clang version 22.0.0git (https://github.com/UltimateForce21/llvm-project.git 79c0a9e1e7da0f727c41d27c9c6ff8a28bb7d06f)"
|
|
.section ".note.GNU-stack","",@progbits
|
|
.addrsig
|
|
.section .debug_line,"",@progbits
|
|
.Lline_table_start0:
|