AMDGPU: Add a few missing mfma rewrite tests (#152434)
Test other splitting situations that appear in greedy. This includes ensuring we have a case that hits a local split and instruction split (most of the tests hit the region split path). Also test a few cases where the final result isn't fully used, resulting in partial copy bundles instead of a simple full copy. Test physreg and virtreg agpr interference with a reassignment candidate. I'm accumulating too many failure cases, and MIR tests are very prone to painful merge conflicts, so I've added a few more tests and extracted new tests from #147975. Closes #149026
This commit is contained in:
parent
a3e0685529
commit
f44d8d583c
@ -20,11 +20,32 @@
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_first() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_second() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_first_physreg() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_second_physreg() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-wave-limiter"="true" "amdgpu-waves-per-eu"="8,8" }
|
||||
attributes #1 = { "amdgpu-wave-limiter"="true" "amdgpu-waves-per-eu"="10,10" }
|
||||
...
|
||||
|
||||
# Inflate pattern, except the defining instruction isn't an MFMA.
|
||||
@ -403,6 +424,89 @@ body: |
|
||||
|
||||
...
|
||||
|
||||
# Non-mac variant, src2 is a physical register
|
||||
---
|
||||
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
occupancy: 10
|
||||
sgprForEXECCopy: '$sgpr100_sgpr101'
|
||||
body: |
|
||||
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_physreg_src2
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
|
||||
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
|
||||
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
|
||||
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1
|
||||
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; CHECK-NEXT: S_BRANCH %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
S_NOP 0, implicit-def $agpr0
|
||||
renamable $sgpr0 = S_MOV_B32 0
|
||||
undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
|
||||
renamable $sgpr1 = COPY renamable $sgpr0
|
||||
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
|
||||
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
%0.sub9:vreg_512_align2 = COPY %0.sub8
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc
|
||||
|
||||
%0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
; No VGPRs available for %0
|
||||
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# Non-mac variant, src2 is the same VGPR, but a different subregister.
|
||||
---
|
||||
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg
|
||||
@ -489,3 +593,423 @@ body: |
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# There isn't an assignable AGPR around the first MFMA.
|
||||
---
|
||||
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_first
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
occupancy: 10
|
||||
sgprForEXECCopy: '$sgpr100_sgpr101'
|
||||
body: |
|
||||
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_first
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
|
||||
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
|
||||
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
|
||||
; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
|
||||
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit-def renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit-def renamable $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47, implicit-def renamable $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit killed renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit killed renamable $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47, implicit killed renamable $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; CHECK-NEXT: S_BRANCH %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
S_NOP 0, implicit-def $agpr0
|
||||
renamable $sgpr0 = S_MOV_B32 0
|
||||
undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
|
||||
renamable $sgpr1 = COPY renamable $sgpr0
|
||||
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
|
||||
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
%0.sub9:vreg_512_align2 = COPY %0.sub8
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc
|
||||
|
||||
undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %2:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
S_NOP 0, implicit-def %6:areg_512_align2, implicit-def %7:areg_512_align2, implicit-def %8:areg_512_align2, implicit-def %9:areg_512_align2
|
||||
%3:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_NOP 0, implicit %6, implicit %7, implicit %8, implicit %9
|
||||
%4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %3, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
; No VGPRs available for %0 or %4
|
||||
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# There isn't an assignable AGPR around the second MFMA.
|
||||
---
|
||||
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_second
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
occupancy: 10
|
||||
sgprForEXECCopy: '$sgpr100_sgpr101'
|
||||
body: |
|
||||
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_second
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
|
||||
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
|
||||
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
|
||||
; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
|
||||
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit-def renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit-def renamable $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47, implicit-def renamable $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit killed renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit killed renamable $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47, implicit killed renamable $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; CHECK-NEXT: S_BRANCH %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
S_NOP 0, implicit-def $agpr0
|
||||
renamable $sgpr0 = S_MOV_B32 0
|
||||
undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
|
||||
renamable $sgpr1 = COPY renamable $sgpr0
|
||||
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
|
||||
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
%0.sub9:vreg_512_align2 = COPY %0.sub8
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc
|
||||
|
||||
undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %2:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
%3:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_NOP 0, implicit-def %6:areg_512_align2, implicit-def %7:areg_512_align2, implicit-def %8:areg_512_align2, implicit-def %9:areg_512_align2
|
||||
%4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %3, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_NOP 0, implicit %6, implicit %7, implicit %8, implicit %9
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
; No VGPRs available for %0 or %4
|
||||
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# There isn't an assignable AGPR around the first MFMA, with physreg interference
|
||||
---
|
||||
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_first_physreg
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
occupancy: 10
|
||||
sgprForEXECCopy: '$sgpr100_sgpr101'
|
||||
body: |
|
||||
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_first_physreg
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
|
||||
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
|
||||
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
|
||||
; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
|
||||
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; CHECK-NEXT: S_BRANCH %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
S_NOP 0, implicit-def $agpr0
|
||||
renamable $sgpr0 = S_MOV_B32 0
|
||||
undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
|
||||
renamable $sgpr1 = COPY renamable $sgpr0
|
||||
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
|
||||
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
%0.sub9:vreg_512_align2 = COPY %0.sub8
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc
|
||||
|
||||
undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %2:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
S_NOP 0, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
S_NOP 0, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
S_NOP 0, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
|
||||
S_NOP 0, implicit-def $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
S_NOP 0, implicit-def $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
|
||||
S_NOP 0, implicit-def $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
|
||||
S_NOP 0, implicit-def $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
|
||||
S_NOP 0, implicit-def $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
%3:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_NOP 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
S_NOP 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
S_NOP 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
|
||||
S_NOP 0, implicit $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
S_NOP 0, implicit $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
|
||||
S_NOP 0, implicit $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
|
||||
S_NOP 0, implicit $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
|
||||
S_NOP 0, implicit $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
%4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %3, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
; No VGPRs available for %0 or %4
|
||||
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# There isn't an assignable AGPR around the second MFMA, physreg interference
|
||||
---
|
||||
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_second_physreg
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
occupancy: 10
|
||||
sgprForEXECCopy: '$sgpr100_sgpr101'
|
||||
body: |
|
||||
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_second_physreg
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
|
||||
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
|
||||
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
|
||||
; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
|
||||
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; CHECK-NEXT: S_BRANCH %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: liveins: $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35:0x00000000FFFFFFFF
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
S_NOP 0, implicit-def $agpr0
|
||||
renamable $sgpr0 = S_MOV_B32 0
|
||||
undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
|
||||
renamable $sgpr1 = COPY renamable $sgpr0
|
||||
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
|
||||
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
%0.sub9:vreg_512_align2 = COPY %0.sub8
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc
|
||||
|
||||
undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %2:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
%3:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_NOP 0, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
S_NOP 0, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
S_NOP 0, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
|
||||
S_NOP 0, implicit-def $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
S_NOP 0, implicit-def $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
|
||||
S_NOP 0, implicit-def $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
|
||||
S_NOP 0, implicit-def $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
|
||||
S_NOP 0, implicit-def $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
%4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %3, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_NOP 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
S_NOP 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
S_NOP 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23
|
||||
S_NOP 0, implicit $agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
S_NOP 0, implicit $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39
|
||||
S_NOP 0, implicit $agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47
|
||||
S_NOP 0, implicit $agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55
|
||||
S_NOP 0, implicit $agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
; No VGPRs available for %0 or %4
|
||||
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %5, %4.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user