From 6912b91891c403c944cccb119102724eedcc04b9 Mon Sep 17 00:00:00 2001 From: Daniil Fukalov Date: Fri, 30 Jan 2026 02:11:07 +0100 Subject: [PATCH] [AMDGPU] Fix crash in SIWholeQuadMode with debug instructions. (#178282) The prepareInsertion function was crashing when debug instructions appeared at positions being queried for slot indices. Debug instructions don't have entries in the slot index map, so getInstructionIndex would fail with an assertion. Fixes SWDEV-480902. --- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 17 ++- llvm/test/CodeGen/AMDGPU/wqm-debug-instr.mir | 117 +++++++++++++++++++ 2 files changed, 128 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/wqm-debug-instr.mir diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 0452dea982d4..5fd0c1e1064c 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -1104,10 +1104,15 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion( LiveRange &LR = LIS->getRegUnit(*TRI->regunits(MCRegister::from(AMDGPU::SCC)).begin()); auto MBBE = MBB.end(); - SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First) - : LIS->getMBBEndIdx(&MBB); - SlotIndex LastIdx = - Last != MBBE ? LIS->getInstructionIndex(*Last) : LIS->getMBBEndIdx(&MBB); + // Skip debug instructions when getting slot indices, as they don't have + // entries in the slot index map. + auto FirstNonDbg = skipDebugInstructionsForward(First, MBBE); + auto LastNonDbg = skipDebugInstructionsForward(Last, MBBE); + SlotIndex FirstIdx = FirstNonDbg != MBBE + ? LIS->getInstructionIndex(*FirstNonDbg) + : LIS->getMBBEndIdx(&MBB); + SlotIndex LastIdx = LastNonDbg != MBBE ? LIS->getInstructionIndex(*LastNonDbg) + : LIS->getMBBEndIdx(&MBB); SlotIndex Idx = PreferLast ? LastIdx : FirstIdx; const LiveRange::Segment *S; @@ -1124,8 +1129,8 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion( } else { MachineInstr *EndMI = LIS->getInstructionFromIndex(S->end.getBaseIndex()); assert(EndMI && "Segment does not end on valid instruction"); - auto NextI = std::next(EndMI->getIterator()); - if (NextI == MBB.end()) + auto NextI = next_nodbg(EndMI->getIterator(), MBB.instr_end()); + if (NextI == MBB.instr_end()) break; SlotIndex Next = LIS->getInstructionIndex(*NextI); if (Next > LastIdx) diff --git a/llvm/test/CodeGen/AMDGPU/wqm-debug-instr.mir b/llvm/test/CodeGen/AMDGPU/wqm-debug-instr.mir new file mode 100644 index 000000000000..6c646a2fd553 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/wqm-debug-instr.mir @@ -0,0 +1,117 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s + +# Test that si-wqm correctly handles debug instructions when computing +# insertion points for SCC save/restore. Debug instructions don't have +# slot indices, so they must be skipped when querying LiveIntervals. + +--- +# Test case 1: Debug instruction at the First position (start of region +# requiring mode change). The pass must skip debug instructions when +# getting slot indices to avoid assertion failure in SlotIndexes. +name: test_debug_instr_at_first +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $m0 + + ; CHECK-LABEL: name: test_debug_instr_at_first + ; CHECK: liveins: $vgpr0, $vgpr1, $m0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[ENTER_STRICT_WQM:%[0-9]+]]:sreg_64 = ENTER_STRICT_WQM -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_PARAM_LOAD:%[0-9]+]]:vgpr_32 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec + ; CHECK-NEXT: DBG_VALUE $noreg, $noreg + ; CHECK-NEXT: DBG_VALUE $noreg, $noreg + ; CHECK-NEXT: $exec = EXIT_STRICT_WQM [[ENTER_STRICT_WQM]] + ; CHECK-NEXT: S_CMP_LT_I32 0, 1, implicit-def $scc + ; CHECK-NEXT: dead [[S_CSELECT_B32_:%[0-9]+]]:sgpr_32 = S_CSELECT_B32 0, 1, implicit $scc + ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0:vreg_64 = COPY [[DS_PARAM_LOAD]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub1:vreg_64 = COPY [[COPY1]] + ; CHECK-NEXT: [[IMAGE_SAMPLE_V4_V2_:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[COPY2]], [[DEF]], [[DEF1]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) + ; CHECK-NEXT: $vgpr0 = COPY [[IMAGE_SAMPLE_V4_V2_]].sub0 + ; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + %2:sgpr_256 = IMPLICIT_DEF + %3:sgpr_128 = IMPLICIT_DEF + ; DS_PARAM_LOAD requires WQM + %4:vgpr_32 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec + ; Debug instructions after WQM instruction - become First for mode transition + DBG_VALUE $noreg, $noreg + DBG_VALUE $noreg, $noreg + ; Scalar compare uses SCC - causes transition from WQM to Exact + S_CMP_LT_I32 0, 1, implicit-def $scc + %5:sgpr_32 = S_CSELECT_B32 0, 1, implicit $scc + ; IMAGE_SAMPLE requires WQM - transition back to WQM + undef %6.sub0:vreg_64 = COPY %4 + %6.sub1:vreg_64 = COPY %1 + %7:vreg_128 = IMAGE_SAMPLE_V4_V2 %6, %2, %3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) + $vgpr0 = COPY %7.sub0 + SI_RETURN_TO_EPILOG $vgpr0 + +... +--- +# Test case 2: Multiple debug instructions after SCC use (S_CSELECT). +# When iterating through SCC live range segments, NextI (instruction after +# segment end) may be a debug instruction followed by more debug instructions. +# The pass must skip all debug instructions to find a valid slot index. +name: test_multiple_debug_after_scc_use +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_multiple_debug_after_scc_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: $exec = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_CMP_LT_I32 0, [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY $scc + ; CHECK-NEXT: [[ENTER_STRICT_WWM1:%[0-9]+]]:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: $scc = COPY [[COPY4]] + ; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], [[COPY]], implicit-def $vcc, implicit $exec + ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sgpr_32 = S_CSELECT_B32 [[COPY1]], [[COPY2]], implicit $scc + ; CHECK-NEXT: DBG_VALUE $noreg, $noreg + ; CHECK-NEXT: DBG_VALUE $noreg, $noreg + ; CHECK-NEXT: DBG_VALUE $noreg, $noreg + ; CHECK-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_CSELECT_B32_]], [[V_ADD_CO_U32_e32_]], implicit-def $vcc, implicit $exec + ; CHECK-NEXT: $exec = EXIT_STRICT_WWM [[ENTER_STRICT_WWM1]] + ; CHECK-NEXT: early-clobber $vgpr0 = V_MOV_B32_e32 [[V_ADD_CO_U32_e32_1]], implicit $exec + ; CHECK-NEXT: $vgpr1 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr2 + %2:sgpr_32 = COPY $sgpr1 + %3:sgpr_32 = COPY $sgpr0 + %4:sgpr_128 = IMPLICIT_DEF + + bb.1: + S_CMP_LT_I32 0, %3:sgpr_32, implicit-def $scc + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, %4:sgpr_128, 0, 0, 0, 0, implicit $exec + %6:vgpr_32 = V_ADD_CO_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit-def $vcc, implicit $exec + %7:sgpr_32 = S_CSELECT_B32 %1:sgpr_32, %2:sgpr_32, implicit $scc + ; Multiple debug instructions after SCC use + DBG_VALUE $noreg, $noreg + DBG_VALUE $noreg, $noreg + DBG_VALUE $noreg, $noreg + %8:vgpr_32 = V_ADD_CO_U32_e32 %7:sgpr_32, %6:vgpr_32, implicit-def $vcc, implicit $exec + $vgpr0 = STRICT_WWM %8:vgpr_32, implicit $exec + $vgpr1 = COPY %5:vgpr_32 + SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 + +...