`insertSimulatedTrap` was returning `HaltLoopBB` when the trap was in a block with no successors and was the last instruction. Since `HaltLoopBB` gets appended to the end of the function, `FinalizeISel` would jump there and skip any intermediate blocks, leaving their pseudos unexpanded. Fix by returning `MBB.getNextNode()` unconditionally: - After `splitAt()`: `getNextNode()` returns the split-off block (`ContBB`) - No split, `MBB` in middle: `getNextNode()` returns the next original block - No split, `MBB` was last: `getNextNode()` returns `HaltLoopBB` (just pushed) Since we always `push_back(HaltLoopBB)` before returning, `getNextNode()` can never be `nullptr`: if `MBB` was the last block, `HaltLoopBB` is now after it. Fixes: SWDEV-572407
57 lines
2.2 KiB
LLVM
57 lines
2.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck %s
|
|
|
|
declare void @llvm.trap()
|
|
|
|
define amdgpu_kernel void @simulated_trap_pseudo_expand(i64 %offset, i1 %should_continue, ptr %base) {
|
|
; CHECK-LABEL: simulated_trap_pseudo_expand:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
|
|
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
|
|
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
|
|
; CHECK-NEXT: s_cbranch_vccz .LBB0_2
|
|
; CHECK-NEXT: ; %bb.1: ; %normal_path
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
|
|
; CHECK-NEXT: s_load_b64 s[2:3], s[4:5], 0x10
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_lshl_b64 s[0:1], s[0:1], 3
|
|
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
|
; CHECK-NEXT: s_add_u32 s0, s2, s0
|
|
; CHECK-NEXT: s_addc_u32 s1, s3, s1
|
|
; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, s1
|
|
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; CHECK-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, v0
|
|
; CHECK-NEXT: flat_store_b64 v[2:3], v[0:1]
|
|
; CHECK-NEXT: s_endpgm
|
|
; CHECK-NEXT: .LBB0_2: ; %trap_block
|
|
; CHECK-NEXT: s_trap 2
|
|
; CHECK-NEXT: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DOORBELL)
|
|
; CHECK-NEXT: s_mov_b32 ttmp2, m0
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_and_b32 s0, s0, 0x3ff
|
|
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; CHECK-NEXT: s_bitset1_b32 s0, 10
|
|
; CHECK-NEXT: s_mov_b32 m0, s0
|
|
; CHECK-NEXT: s_sendmsg sendmsg(MSG_INTERRUPT)
|
|
; CHECK-NEXT: s_mov_b32 m0, ttmp2
|
|
; CHECK-NEXT: .LBB0_3: ; =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: s_sethalt 5
|
|
; CHECK-NEXT: s_branch .LBB0_3
|
|
entry:
|
|
br i1 %should_continue, label %normal_path, label %trap_block
|
|
|
|
trap_block:
|
|
call void @llvm.trap()
|
|
unreachable
|
|
|
|
normal_path:
|
|
%ptr = getelementptr double, ptr %base, i64 %offset
|
|
store double 0.0, ptr %ptr, align 8
|
|
ret void
|
|
}
|
|
|