
andorbitset.ll is interesting since it directly depends on the difference between poison and undef. Not sure it's useful to keep the version using poison, I assume none of this code makes it to codegen. si-spill-cf.ll was also a nasty case, which I doubt has been reproducing its original issue for a very long time. I had to reclaim an older version, replace some of the poison uses, and run simplify-cfg. There's a very slight change in the final CFG with this, but final the output is approximately the same as it used to be.
106 lines
3.8 KiB
LLVM
106 lines
3.8 KiB
LLVM
; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=OPT %s
|
|
; RUN: opt -mtriple=amdgcn-- -S -passes=structurizecfg,si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=OPT %s
|
|
; RUN: llc -mtriple=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
; OPT-LABEL: @annotate_unreachable_noloop(
|
|
; OPT-NOT: call i1 @llvm.amdgcn.loop
|
|
|
|
; GCN-LABEL: {{^}}annotate_unreachable_noloop:
|
|
; GCN: s_cbranch_scc1
|
|
; GCN-NOT: s_endpgm
|
|
; GCN: .Lfunc_end0
|
|
define amdgpu_kernel void @annotate_unreachable_noloop(ptr addrspace(1) noalias nocapture readonly %arg) #0 {
|
|
bb:
|
|
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
br label %bb1
|
|
|
|
bb1: ; preds = %bb
|
|
%tmp2 = sext i32 %tmp to i64
|
|
%tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2
|
|
%tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16
|
|
br i1 poison, label %bb5, label %bb3
|
|
|
|
bb3: ; preds = %bb1
|
|
%tmp6 = extractelement <4 x float> %tmp4, i32 2
|
|
%tmp7 = fcmp olt float %tmp6, 0.000000e+00
|
|
br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
|
|
|
|
bb4: ; preds = %bb3
|
|
unreachable
|
|
|
|
bb5: ; preds = %bb3, %bb1
|
|
unreachable
|
|
}
|
|
|
|
|
|
; OPT-LABEL: @annotate_ret_noloop(
|
|
; OPT-NOT: call i1 @llvm.amdgcn.loop
|
|
|
|
; GCN-LABEL: {{^}}annotate_ret_noloop:
|
|
; GCN: load_dwordx4
|
|
; GCN: v_cmp_nlt_f32
|
|
; GCN: s_and_saveexec_b64
|
|
; GCN-NEXT: s_endpgm
|
|
; GCN: .Lfunc_end
|
|
define amdgpu_kernel void @annotate_ret_noloop(ptr addrspace(1) noalias nocapture readonly %arg) #0 {
|
|
bb:
|
|
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
br label %bb1
|
|
|
|
bb1: ; preds = %bb
|
|
%tmp2 = sext i32 %tmp to i64
|
|
%tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2
|
|
%tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16
|
|
%tmp5 = extractelement <4 x float> %tmp4, i32 1
|
|
store volatile <4 x float> %tmp4, ptr addrspace(1) poison
|
|
%cmp = fcmp ogt float %tmp5, 1.0
|
|
br i1 %cmp, label %bb5, label %bb3
|
|
|
|
bb3: ; preds = %bb1
|
|
%tmp6 = extractelement <4 x float> %tmp4, i32 2
|
|
%tmp7 = fcmp olt float %tmp6, 0.000000e+00
|
|
br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
|
|
|
|
bb4: ; preds = %bb3
|
|
ret void
|
|
|
|
bb5: ; preds = %bb3, %bb1
|
|
ret void
|
|
}
|
|
|
|
; OPT-LABEL: @uniform_annotate_ret_noloop(
|
|
; OPT-NOT: call i1 @llvm.amdgcn.loop
|
|
|
|
; GCN-LABEL: {{^}}uniform_annotate_ret_noloop:
|
|
; GCN: s_cbranch_scc1
|
|
; GCN: s_endpgm
|
|
; GCN: .Lfunc_end
|
|
define amdgpu_kernel void @uniform_annotate_ret_noloop(ptr addrspace(1) noalias nocapture readonly %arg, i32 %tmp) #0 {
|
|
bb:
|
|
br label %bb1
|
|
|
|
bb1: ; preds = %bb
|
|
%tmp2 = sext i32 %tmp to i64
|
|
%tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2
|
|
%tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16
|
|
%undef = freeze i1 poison
|
|
br i1 %undef, label %bb5, label %bb3
|
|
|
|
bb3: ; preds = %bb1
|
|
%tmp6 = extractelement <4 x float> %tmp4, i32 2
|
|
%tmp7 = fcmp olt float %tmp6, 0.000000e+00
|
|
br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
|
|
|
|
bb4: ; preds = %bb3
|
|
ret void
|
|
|
|
bb5: ; preds = %bb3, %bb1
|
|
ret void
|
|
}
|
|
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readnone }
|