si-annotate-control-flow does depth first traversal of BB's of a function to insert amdgcn if intrinsics for conditional branches so that isel can generate correct instructions later. si-annotate-control-flow checks whether the successor BB for the 'else' branch of a conditional branch has been visited. If it has been visited, si-annotate-control-flow assumes the conditional branch has been handled and will not try to insert if intrinsic for it. This assumption is not correct when the IR contains multiple unreachable BB's. Then 'if' intrinscs are not inserted and incorrect ISA are generated. This patch fixes the issue by let amdgpu-unify-divergent-exit-nodes unify unreachables even if they are uniformly reached. In this way the IR will not contain multiple exits, and structurizer is able to structurize the IR containing one unified exit. Reviewed by: Ruiling Song, Matt Arsenault Differential Revision: https://reviews.llvm.org/D131181 Fixes: SWDEV-343244
63 lines
2.2 KiB
LLVM
63 lines
2.2 KiB
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
|
|
define void @test() #1 {
|
|
; Clean up the unreachable blocks introduced with LowerSwitch pass.
|
|
; This test ensures that, in the pass flow, UnreachableBlockElim pass
|
|
; follows the LowerSwitch. Otherwise, this testcase will crash
|
|
; immediately after the instruction selection due to the incomplete
|
|
; PHI node in an MBB whose incoming values were never codegenerated.
|
|
;
|
|
; GCN-LABEL: name: test
|
|
; GCN: bb.{{[0-9]+}}.entry:
|
|
; GCN: bb.{{[0-9]+}}.Flow1:
|
|
; GCN: bb.{{[0-9]+}}.entry.true.blk:
|
|
; GCN: bb.{{[0-9]+}}.entry.false.blk:
|
|
; GCN: bb.{{[0-9]+}}.switch.blk:
|
|
|
|
; GCN-NOT: bb.{{[0-9]+}}.preheader.blk
|
|
; GCN-NOT: bb.{{[0-9]+}}.pre.false.blk:
|
|
; GCN-NOT: bb.{{[0-9]+}}.unreach.blk:
|
|
; GCN-NOT: PHI
|
|
|
|
; GCN: bb.{{[0-9]+}}.Flow:
|
|
; GCN: bb.{{[0-9]+}}.UnifiedReturnBlock:
|
|
entry:
|
|
%idx = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
br i1 undef, label %entry.true.blk, label %entry.false.blk
|
|
|
|
entry.true.blk: ; preds = %entry
|
|
%exit.cmp = icmp ult i32 %idx, 3
|
|
br i1 %exit.cmp, label %switch.blk, label %exit
|
|
|
|
entry.false.blk: ; preds = %entry
|
|
unreachable
|
|
|
|
switch.blk: ; preds = %entry.true.blk
|
|
switch i32 %idx, label %preheader.blk [
|
|
i32 0, label %exit
|
|
i32 1, label %exit
|
|
i32 2, label %exit
|
|
]
|
|
|
|
preheader.blk: ; preds = %switch.blk
|
|
%pre.exit = icmp ult i32 %idx, 5
|
|
br i1 %pre.exit, label %unreach.blk, label %pre.false.blk
|
|
|
|
pre.false.blk: ; preds = %preheader.blk
|
|
%call.pre.false = tail call i32 @func(i32 %idx) #0
|
|
br label %unreach.blk
|
|
|
|
unreach.blk: ; preds = %preheader.blk, %pre.false.blk
|
|
%phi.val = phi i32 [ %call.pre.false, %pre.false.blk ], [ undef, %preheader.blk ]
|
|
store i32 %phi.val, i32* undef
|
|
unreachable
|
|
|
|
exit: ; preds = %switch.blk
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
declare i32 @func(i32)#0
|
|
|
|
attributes #0 = { nounwind readnone }
|
|
attributes #1 = { nounwind }
|