[AMDGPU] Fix crash in SIWholeQuadMode with debug instructions. (#178282)

The prepareInsertion function was crashing when debug instructions
appeared at positions being queried for slot indices. Debug instructions
don't have entries in the slot index map, so getInstructionIndex would
fail with an assertion.

Fixes SWDEV-480902.
This commit is contained in:
Daniil Fukalov 2026-01-30 02:11:07 +01:00 committed by GitHub
parent 469a6fd185
commit 6912b91891
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 128 additions and 6 deletions

View File

@ -1104,10 +1104,15 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
LiveRange &LR =
LIS->getRegUnit(*TRI->regunits(MCRegister::from(AMDGPU::SCC)).begin());
auto MBBE = MBB.end();
SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First)
: LIS->getMBBEndIdx(&MBB);
SlotIndex LastIdx =
Last != MBBE ? LIS->getInstructionIndex(*Last) : LIS->getMBBEndIdx(&MBB);
// Skip debug instructions when getting slot indices, as they don't have
// entries in the slot index map.
auto FirstNonDbg = skipDebugInstructionsForward(First, MBBE);
auto LastNonDbg = skipDebugInstructionsForward(Last, MBBE);
SlotIndex FirstIdx = FirstNonDbg != MBBE
? LIS->getInstructionIndex(*FirstNonDbg)
: LIS->getMBBEndIdx(&MBB);
SlotIndex LastIdx = LastNonDbg != MBBE ? LIS->getInstructionIndex(*LastNonDbg)
: LIS->getMBBEndIdx(&MBB);
SlotIndex Idx = PreferLast ? LastIdx : FirstIdx;
const LiveRange::Segment *S;
@ -1124,8 +1129,8 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
} else {
MachineInstr *EndMI = LIS->getInstructionFromIndex(S->end.getBaseIndex());
assert(EndMI && "Segment does not end on valid instruction");
auto NextI = std::next(EndMI->getIterator());
if (NextI == MBB.end())
auto NextI = next_nodbg(EndMI->getIterator(), MBB.instr_end());
if (NextI == MBB.instr_end())
break;
SlotIndex Next = LIS->getInstructionIndex(*NextI);
if (Next > LastIdx)

View File

@ -0,0 +1,117 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
# Test that si-wqm correctly handles debug instructions when computing
# insertion points for SCC save/restore. Debug instructions don't have
# slot indices, so they must be skipped when querying LiveIntervals.
---
# Test case 1: Debug instruction at the First position (start of region
# requiring mode change). The pass must skip debug instructions when
# getting slot indices to avoid assertion failure in SlotIndexes.
name: test_debug_instr_at_first
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $m0
; CHECK-LABEL: name: test_debug_instr_at_first
; CHECK: liveins: $vgpr0, $vgpr1, $m0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[ENTER_STRICT_WQM:%[0-9]+]]:sreg_64 = ENTER_STRICT_WQM -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_PARAM_LOAD:%[0-9]+]]:vgpr_32 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
; CHECK-NEXT: DBG_VALUE $noreg, $noreg
; CHECK-NEXT: DBG_VALUE $noreg, $noreg
; CHECK-NEXT: $exec = EXIT_STRICT_WQM [[ENTER_STRICT_WQM]]
; CHECK-NEXT: S_CMP_LT_I32 0, 1, implicit-def $scc
; CHECK-NEXT: dead [[S_CSELECT_B32_:%[0-9]+]]:sgpr_32 = S_CSELECT_B32 0, 1, implicit $scc
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:vreg_64 = COPY [[DS_PARAM_LOAD]]
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1:vreg_64 = COPY [[COPY1]]
; CHECK-NEXT: [[IMAGE_SAMPLE_V4_V2_:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[COPY2]], [[DEF]], [[DEF1]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
; CHECK-NEXT: $vgpr0 = COPY [[IMAGE_SAMPLE_V4_V2_]].sub0
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
%2:sgpr_256 = IMPLICIT_DEF
%3:sgpr_128 = IMPLICIT_DEF
; DS_PARAM_LOAD requires WQM
%4:vgpr_32 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
; Debug instructions after WQM instruction - become First for mode transition
DBG_VALUE $noreg, $noreg
DBG_VALUE $noreg, $noreg
; Scalar compare uses SCC - causes transition from WQM to Exact
S_CMP_LT_I32 0, 1, implicit-def $scc
%5:sgpr_32 = S_CSELECT_B32 0, 1, implicit $scc
; IMAGE_SAMPLE requires WQM - transition back to WQM
undef %6.sub0:vreg_64 = COPY %4
%6.sub1:vreg_64 = COPY %1
%7:vreg_128 = IMAGE_SAMPLE_V4_V2 %6, %2, %3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
$vgpr0 = COPY %7.sub0
SI_RETURN_TO_EPILOG $vgpr0
...
---
# Test case 2: Multiple debug instructions after SCC use (S_CSELECT).
# When iterating through SCC live range segments, NextI (instruction after
# segment end) may be a debug instruction followed by more debug instructions.
# The pass must skip all debug instructions to find a valid slot index.
name: test_multiple_debug_after_scc_use
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test_multiple_debug_after_scc_use
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: $exec = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: S_CMP_LT_I32 0, [[COPY3]], implicit-def $scc
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY $scc
; CHECK-NEXT: [[ENTER_STRICT_WWM1:%[0-9]+]]:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: $scc = COPY [[COPY4]]
; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], [[COPY]], implicit-def $vcc, implicit $exec
; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sgpr_32 = S_CSELECT_B32 [[COPY1]], [[COPY2]], implicit $scc
; CHECK-NEXT: DBG_VALUE $noreg, $noreg
; CHECK-NEXT: DBG_VALUE $noreg, $noreg
; CHECK-NEXT: DBG_VALUE $noreg, $noreg
; CHECK-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_CSELECT_B32_]], [[V_ADD_CO_U32_e32_]], implicit-def $vcc, implicit $exec
; CHECK-NEXT: $exec = EXIT_STRICT_WWM [[ENTER_STRICT_WWM1]]
; CHECK-NEXT: early-clobber $vgpr0 = V_MOV_B32_e32 [[V_ADD_CO_U32_e32_1]], implicit $exec
; CHECK-NEXT: $vgpr1 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_32 = COPY $sgpr2
%2:sgpr_32 = COPY $sgpr1
%3:sgpr_32 = COPY $sgpr0
%4:sgpr_128 = IMPLICIT_DEF
bb.1:
S_CMP_LT_I32 0, %3:sgpr_32, implicit-def $scc
%5:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, %4:sgpr_128, 0, 0, 0, 0, implicit $exec
%6:vgpr_32 = V_ADD_CO_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit-def $vcc, implicit $exec
%7:sgpr_32 = S_CSELECT_B32 %1:sgpr_32, %2:sgpr_32, implicit $scc
; Multiple debug instructions after SCC use
DBG_VALUE $noreg, $noreg
DBG_VALUE $noreg, $noreg
DBG_VALUE $noreg, $noreg
%8:vgpr_32 = V_ADD_CO_U32_e32 %7:sgpr_32, %6:vgpr_32, implicit-def $vcc, implicit $exec
$vgpr0 = STRICT_WWM %8:vgpr_32, implicit $exec
$vgpr1 = COPY %5:vgpr_32
SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
...