
andorbitset.ll is interesting since it directly depends on the difference between poison and undef. Not sure it's useful to keep the version using poison, I assume none of this code makes it to codegen. si-spill-cf.ll was also a nasty case, which I doubt has been reproducing its original issue for a very long time. I had to reclaim an older version, replace some of the poison uses, and run simplify-cfg. There's a very slight change in the final CFG with this, but final the output is approximately the same as it used to be.
157 lines
5.6 KiB
YAML
157 lines
5.6 KiB
YAML
# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,VI %s
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
|
|
|
# If there's a base offset, check that SILoadStoreOptimizer creates
|
|
# V_ADD_{I|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than
|
|
# $vcc, which is used in _e32); this ensures that $vcc is not inadvertently
|
|
# clobbered.
|
|
|
|
# GCN-LABEL: name: ds_combine_base_offset{{$}}
|
|
|
|
# VI: V_ADD_CO_U32_e64 %6, %0,
|
|
# VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8,
|
|
# VI: V_ADD_CO_U32_e64 %10, %3,
|
|
# VI-NEXT: DS_READ2_B32 killed %11, 16, 24,
|
|
|
|
# GFX9: V_ADD_U32_e64 %6, %0,
|
|
# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8,
|
|
# GFX9: V_ADD_U32_e64 %9, %3,
|
|
# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 16, 24,
|
|
|
|
--- |
|
|
@0 = internal unnamed_addr addrspace(3) global [256 x float] poison, align 4
|
|
|
|
define amdgpu_kernel void @ds_combine_base_offset() {
|
|
bb.0:
|
|
br label %bb2
|
|
|
|
bb1:
|
|
ret void
|
|
|
|
bb2:
|
|
%tmp = getelementptr inbounds [256 x float], ptr addrspace(3) @0, i32 0, i32 0
|
|
%tmp1 = getelementptr inbounds float, ptr addrspace(3) %tmp, i32 8
|
|
%tmp2 = getelementptr inbounds float, ptr addrspace(3) %tmp, i32 16
|
|
%tmp3 = getelementptr inbounds float, ptr addrspace(3) %tmp, i32 24
|
|
br label %bb1
|
|
}
|
|
|
|
define amdgpu_kernel void @ds_combine_base_offset_subreg() {
|
|
bb.0:
|
|
br label %bb2
|
|
|
|
bb1:
|
|
ret void
|
|
|
|
bb2:
|
|
%tmp = getelementptr inbounds [256 x float], ptr addrspace(3) @0, i32 0, i32 0
|
|
%tmp1 = getelementptr inbounds float, ptr addrspace(3) %tmp, i32 8
|
|
%tmp2 = getelementptr inbounds float, ptr addrspace(3) %tmp, i32 16
|
|
%tmp3 = getelementptr inbounds float, ptr addrspace(3) %tmp, i32 24
|
|
br label %bb1
|
|
}
|
|
|
|
define amdgpu_kernel void @ds_combine_subreg() {
|
|
bb.0:
|
|
br label %bb2
|
|
|
|
bb1:
|
|
ret void
|
|
|
|
bb2:
|
|
%tmp = getelementptr inbounds [256 x float], ptr addrspace(3) @0, i32 0, i32 0
|
|
%tmp1 = getelementptr inbounds float, ptr addrspace(3) %tmp, i32 8
|
|
%tmp2 = getelementptr inbounds float, ptr addrspace(3) %tmp, i32 16
|
|
%tmp3 = getelementptr inbounds float, ptr addrspace(3) %tmp, i32 24
|
|
br label %bb1
|
|
}
|
|
---
|
|
name: ds_combine_base_offset
|
|
body: |
|
|
bb.0:
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
S_BRANCH %bb.2
|
|
|
|
bb.1:
|
|
S_ENDPGM 0
|
|
|
|
bb.2:
|
|
%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec
|
|
%2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
|
|
V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
|
|
DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp)
|
|
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp1)
|
|
%4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp2)
|
|
%5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp3)
|
|
$vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
|
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
|
S_BRANCH %bb.1
|
|
...
|
|
|
|
# GCN-LABEL: name: ds_combine_base_offset_subreg{{$}}
|
|
|
|
# VI: V_ADD_CO_U32_e64 %6, %0.sub0,
|
|
# VI-NEXT: DS_WRITE2_B32 killed %7, %0.sub0, %3.sub0, 0, 8,
|
|
# VI: V_ADD_CO_U32_e64 %10, %3.sub0,
|
|
# VI-NEXT: DS_READ2_B32 killed %11, 16, 24,
|
|
|
|
# GFX9: V_ADD_U32_e64 %6, %0.sub0,
|
|
# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0.sub0, %3.sub0, 0, 8,
|
|
# GFX9: V_ADD_U32_e64 %9, %3.sub0,
|
|
# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 16, 24,
|
|
---
|
|
name: ds_combine_base_offset_subreg
|
|
body: |
|
|
bb.0:
|
|
%0:vreg_64 = IMPLICIT_DEF
|
|
S_BRANCH %bb.2
|
|
|
|
bb.1:
|
|
S_ENDPGM 0
|
|
|
|
bb.2:
|
|
%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
|
|
%2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
|
|
V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
|
|
DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp)
|
|
%3:vreg_64 = V_LSHLREV_B64_e64 0, 0, implicit $exec
|
|
DS_WRITE_B32 %0.sub0, %3.sub0, 1056, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp1)
|
|
%4:vgpr_32 = DS_READ_B32 %3.sub0, 1088, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp2)
|
|
%5:vgpr_32 = DS_READ_B32 %3.sub0, 1120, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp3)
|
|
$vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
|
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
|
S_BRANCH %bb.1
|
|
...
|
|
|
|
# GCN-LABEL: name: ds_combine_subreg{{$}}
|
|
|
|
# VI: DS_WRITE2_B32 %0.sub0, %0.sub0, %3.sub0, 0, 8,
|
|
# VI: DS_READ2_B32 %3.sub0, 0, 8,
|
|
|
|
# GFX9: DS_WRITE2_B32_gfx9 %0.sub0, %0.sub0, %3.sub0, 0, 8,
|
|
# GFX9: DS_READ2_B32_gfx9 %3.sub0, 0, 8,
|
|
---
|
|
name: ds_combine_subreg
|
|
body: |
|
|
bb.0:
|
|
%0:vreg_64 = IMPLICIT_DEF
|
|
S_BRANCH %bb.2
|
|
|
|
bb.1:
|
|
S_ENDPGM 0
|
|
|
|
bb.2:
|
|
%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
|
|
%2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
|
|
V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
|
|
DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp)
|
|
%3:vreg_64 = V_LSHLREV_B64_e64 0, 0, implicit $exec
|
|
DS_WRITE_B32 %0.sub0, %3.sub0, 32, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp1)
|
|
%4:vgpr_32 = DS_READ_B32 %3.sub0, 0, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp2)
|
|
%5:vgpr_32 = DS_READ_B32 %3.sub0, 32, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp3)
|
|
$vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
|
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
|
S_BRANCH %bb.1
|
|
...
|