llvm-project/llvm/test/CodeGen/AMDGPU/simulated-trap-pseudo-expand.ll
Dark Steve 9429a1e809
[AMDGPU] Fix insertSimulatedTrap to return correct continuation block (#174774)
`insertSimulatedTrap` was returning `HaltLoopBB` when the trap was in a
block with no successors and was the last instruction. Since
`HaltLoopBB` gets appended to the end of the function, `FinalizeISel`
would jump there and skip any intermediate blocks, leaving their pseudos
unexpanded.

Fix by returning `MBB.getNextNode()` unconditionally:
- After `splitAt()`: `getNextNode()` returns the split-off block
(`ContBB`)
- No split, `MBB` in middle: `getNextNode()` returns the next original
block
- No split, `MBB` was last: `getNextNode()` returns `HaltLoopBB` (just
pushed)

Since we always `push_back(HaltLoopBB)` before returning,
`getNextNode()` can never be `nullptr`: if `MBB` was the last block,
`HaltLoopBB` is now after it.

Fixes: SWDEV-572407
2026-01-21 11:52:38 +05:30

57 lines
2.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck %s
declare void @llvm.trap()
define amdgpu_kernel void @simulated_trap_pseudo_expand(i64 %offset, i1 %should_continue, ptr %base) {
; CHECK-LABEL: simulated_trap_pseudo_expand:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_load_b32 s0, s[4:5], 0x8
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
; CHECK-NEXT: s_cbranch_vccz .LBB0_2
; CHECK-NEXT: ; %bb.1: ; %normal_path
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; CHECK-NEXT: s_load_b64 s[2:3], s[4:5], 0x10
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshl_b64 s[0:1], s[0:1], 3
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; CHECK-NEXT: s_add_u32 s0, s2, s0
; CHECK-NEXT: s_addc_u32 s1, s3, s1
; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, s1
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; CHECK-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, v0
; CHECK-NEXT: flat_store_b64 v[2:3], v[0:1]
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: .LBB0_2: ; %trap_block
; CHECK-NEXT: s_trap 2
; CHECK-NEXT: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DOORBELL)
; CHECK-NEXT: s_mov_b32 ttmp2, m0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_and_b32 s0, s0, 0x3ff
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; CHECK-NEXT: s_bitset1_b32 s0, 10
; CHECK-NEXT: s_mov_b32 m0, s0
; CHECK-NEXT: s_sendmsg sendmsg(MSG_INTERRUPT)
; CHECK-NEXT: s_mov_b32 m0, ttmp2
; CHECK-NEXT: .LBB0_3: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_sethalt 5
; CHECK-NEXT: s_branch .LBB0_3
entry:
br i1 %should_continue, label %normal_path, label %trap_block
trap_block:
call void @llvm.trap()
unreachable
normal_path:
%ptr = getelementptr double, ptr %base, i64 %offset
store double 0.0, ptr %ptr, align 8
ret void
}