[AMDGPU] Generate some WQM/WWM tests (NFC) (#152635)
Update llvm.amdgcn.kill.ll and wqm.mir to be generated. This preparatory work for refactoring of WQM/WWM pass.
This commit is contained in:
parent
2d4bac8675
commit
0bdd312b1d
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-wqm -o - %s | FileCheck %s
|
||||
|
||||
@ -46,10 +47,6 @@
|
||||
|
||||
---
|
||||
# Check for awareness that s_or_saveexec_b64 clobbers SCC
|
||||
#
|
||||
#CHECK: ENTER_STRICT_WWM
|
||||
#CHECK: S_CMP_LT_I32
|
||||
#CHECK: S_CSELECT_B32
|
||||
name: test_strict_wwm_scc
|
||||
alignment: 1
|
||||
exposesReturnsTwice: false
|
||||
@ -80,6 +77,21 @@ body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
|
||||
|
||||
; CHECK-LABEL: name: test_strict_wwm_scc
|
||||
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; CHECK-NEXT: S_CMP_LT_I32 0, [[COPY3]], implicit-def $scc
|
||||
; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], [[COPY]], implicit-def $vcc, implicit $exec
|
||||
; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sgpr_32 = S_CSELECT_B32 [[COPY1]], [[COPY2]], implicit $scc
|
||||
; CHECK-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_CSELECT_B32_]], [[V_ADD_CO_U32_e32_]], implicit-def $vcc, implicit $exec
|
||||
; CHECK-NEXT: $exec = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
|
||||
; CHECK-NEXT: early-clobber $vgpr0 = V_MOV_B32_e32 [[V_ADD_CO_U32_e32_1]], implicit $exec
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%3 = COPY $vgpr0
|
||||
%2 = COPY $sgpr2
|
||||
%1 = COPY $sgpr1
|
||||
@ -96,16 +108,35 @@ body: |
|
||||
---
|
||||
# Second test for awareness that s_or_saveexec_b64 clobbers SCC
|
||||
# Because entry block is treated differently.
|
||||
#
|
||||
#CHECK: %bb.1
|
||||
#CHECK: S_CMP_LT_I32
|
||||
#CHECK: COPY $scc
|
||||
#CHECK: ENTER_STRICT_WWM
|
||||
#CHECK: $scc = COPY
|
||||
#CHECK: S_CSELECT_B32
|
||||
name: test_strict_wwm_scc2
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_strict_wwm_scc2
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: $exec = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: S_CMP_LT_I32 0, [[COPY3]], implicit-def $scc
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY $scc
|
||||
; CHECK-NEXT: [[ENTER_STRICT_WWM1:%[0-9]+]]:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: $scc = COPY [[COPY4]]
|
||||
; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], [[COPY]], implicit-def $vcc, implicit $exec
|
||||
; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sgpr_32 = S_CSELECT_B32 [[COPY1]], [[COPY2]], implicit $scc
|
||||
; CHECK-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_CSELECT_B32_]], [[V_ADD_CO_U32_e32_]], implicit-def $vcc, implicit $exec
|
||||
; CHECK-NEXT: $exec = EXIT_STRICT_WWM [[ENTER_STRICT_WWM1]]
|
||||
; CHECK-NEXT: early-clobber $vgpr0 = V_MOV_B32_e32 [[V_ADD_CO_U32_e32_1]], implicit $exec
|
||||
; CHECK-NEXT: $vgpr1 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
|
||||
|
||||
@ -130,7 +161,6 @@ body: |
|
||||
---
|
||||
# V_SET_INACTIVE, when its second operand is undef, is replaced by a
|
||||
# COPY by si-wqm. Ensure the instruction is removed.
|
||||
#CHECK-NOT: V_SET_INACTIVE
|
||||
name: no_cfg
|
||||
alignment: 1
|
||||
exposesReturnsTwice: false
|
||||
@ -167,6 +197,28 @@ body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
||||
|
||||
; CHECK-LABEL: name: no_cfg
|
||||
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3
|
||||
; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:sgpr_128 = COPY [[REG_SEQUENCE]]
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1
|
||||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
|
||||
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY6]], implicit $exec, implicit-def $scc
|
||||
; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], [[COPY7]], 323, 12, 15, 0, implicit $exec
|
||||
; CHECK-NEXT: $exec = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
|
||||
; CHECK-NEXT: early-clobber %15:vgpr_32 = V_MOV_B32_e32 [[V_MOV_B32_dpp]], implicit $exec
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET_exact %15, [[REG_SEQUENCE]], [[S_MOV_B32_]], 4, 0, 0, implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%3:sgpr_32 = COPY $sgpr3
|
||||
%2:sgpr_32 = COPY $sgpr2
|
||||
%1:sgpr_32 = COPY $sgpr1
|
||||
@ -189,18 +241,32 @@ body: |
|
||||
|
||||
---
|
||||
# Ensure that strict_wwm is not put around an EXEC copy
|
||||
#CHECK-LABEL: name: copy_exec
|
||||
#CHECK: %7:sreg_64 = COPY $exec
|
||||
#CHECK-NEXT: %13:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
#CHECK-NEXT: %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
#CHECK-NEXT: $exec = EXIT_STRICT_WWM %13
|
||||
#CHECK-NEXT: %9:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %7.sub0, 0, implicit $exec
|
||||
name: copy_exec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
||||
|
||||
; CHECK-LABEL: name: copy_exec
|
||||
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; CHECK-NEXT: dead [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
|
||||
; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: $exec = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
|
||||
; CHECK-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY4]].sub0, 0, implicit $exec
|
||||
; CHECK-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_MBCNT_LO_U32_B32_e64_]], 312, 15, 15, 0, implicit $exec
|
||||
; CHECK-NEXT: dead [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_MOV_B32_dpp]], 63
|
||||
; CHECK-NEXT: early-clobber %12:vgpr_32 = V_MOV_B32_e32 [[V_MOV_B32_e32_]], implicit $exec
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET_exact %12, [[REG_SEQUENCE]], [[S_MOV_B32_]], 4, 0, 0, implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%3:sgpr_32 = COPY $sgpr3
|
||||
%2:sgpr_32 = COPY $sgpr2
|
||||
%1:sgpr_32 = COPY $sgpr1
|
||||
@ -224,20 +290,48 @@ body: |
|
||||
---
|
||||
# Check exit of WQM is still inserted correctly when SCC is live until block end.
|
||||
# Critially this tests that compilation does not fail.
|
||||
#CHECK-LABEL: name: scc_always_live
|
||||
#CHECK: %8:vreg_128 = IMAGE_SAMPLE_V4_V2 %7
|
||||
#CHECK-NEXT: S_CMP_EQ_U32 %2, 0, implicit-def $scc
|
||||
#CHECK-NEXT: undef %9.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64
|
||||
#CHECK-NEXT: %9.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32
|
||||
#CHECK-NEXT: %14:sreg_32_xm0 = COPY $scc
|
||||
#CHECK-NEXT: $exec = S_AND_B64 $exec, %13, implicit-def $scc
|
||||
#CHECK-NEXT: $scc = COPY %14
|
||||
#CHECK-NEXT: %10:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64
|
||||
#CHECK-NEXT: %11:vreg_128 = IMAGE_SAMPLE_V4_V2
|
||||
#CHECK-NEXT: S_CBRANCH_SCC0 %bb.2
|
||||
name: scc_always_live
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: scc_always_live
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr1, $sgpr2, $vgpr1, $vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec
|
||||
; CHECK-NEXT: $m0 = COPY $sgpr1
|
||||
; CHECK-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[V_INTERP_P1_F32_:%[0-9]+]]:vgpr_32 = V_INTERP_P1_F32 [[COPY1]], 3, 2, implicit $mode, implicit $m0, implicit $exec
|
||||
; CHECK-NEXT: [[V_INTERP_P1_F32_1:%[0-9]+]]:vgpr_32 = V_INTERP_P1_F32 [[COPY2]], 3, 2, implicit $mode, implicit $m0, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY4:%[0-9]+]].sub0:vreg_64 = COPY [[V_INTERP_P1_F32_]]
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]].sub1:vreg_64 = COPY [[V_INTERP_P1_F32_1]]
|
||||
; CHECK-NEXT: [[IMAGE_SAMPLE_V4_V2_:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[COPY4]], [[DEF]], [[DEF1]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
; CHECK-NEXT: S_CMP_EQ_U32 [[COPY3]], 0, implicit-def $scc
|
||||
; CHECK-NEXT: undef [[V_ADD_F32_e64_:%[0-9]+]].sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 0, [[IMAGE_SAMPLE_V4_V2_]].sub0, 0, [[V_INTERP_P1_F32_1]], 1, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]].sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 [[V_INTERP_P1_F32_]], [[V_INTERP_P1_F32_1]], implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY $scc
|
||||
; CHECK-NEXT: $exec = S_AND_B64 $exec, [[COPY]], implicit-def $scc
|
||||
; CHECK-NEXT: $scc = COPY [[COPY5]]
|
||||
; CHECK-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 0, [[V_INTERP_P1_F32_]], 0, [[V_INTERP_P1_F32_1]], 1, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[IMAGE_SAMPLE_V4_V2_1:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[V_ADD_F32_e64_]], [[DEF]], [[DEF1]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[V_ADD_F32_e64_1]], [[DEF1]], [[S_MOV_B32_]], 4, 0, 0, implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[IMAGE_SAMPLE_V4_V2_]].sub0
|
||||
; CHECK-NEXT: $vgpr1 = COPY [[IMAGE_SAMPLE_V4_V2_]].sub1
|
||||
; CHECK-NEXT: $vgpr2 = COPY [[IMAGE_SAMPLE_V4_V2_1]].sub0
|
||||
; CHECK-NEXT: $vgpr3 = COPY [[IMAGE_SAMPLE_V4_V2_1]].sub1
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||
bb.0:
|
||||
liveins: $sgpr1, $sgpr2, $vgpr1, $vgpr2
|
||||
|
||||
@ -281,18 +375,26 @@ body: |
|
||||
---
|
||||
# Check that unnecessary instruction do not get marked for WWM
|
||||
#
|
||||
#CHECK-NOT: ENTER_STRICT_WWM
|
||||
#CHECK: BUFFER_LOAD_DWORDX2
|
||||
#CHECK: ENTER_STRICT_WWM
|
||||
#CHECK: V_SET_INACTIVE_B32
|
||||
#CHECK: V_SET_INACTIVE_B32
|
||||
#CHECK-NOT: ENTER_STRICT_WWM
|
||||
#CHECK: V_MAX
|
||||
name: test_wwm_set_inactive_propagation
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
|
||||
; CHECK-LABEL: name: test_wwm_set_inactive_propagation
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], [[COPY]], 0, 0, 0, 0, implicit $exec
|
||||
; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_64_xexec = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]].sub0:vreg_64 = V_SET_INACTIVE_B32 0, [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0, 0, 0, undef [[ENTER_STRICT_WWM]], implicit $exec, implicit-def $scc
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]].sub1:vreg_64 = V_SET_INACTIVE_B32 0, [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1, 0, 0, undef [[ENTER_STRICT_WWM]], implicit $exec, implicit-def $scc
|
||||
; CHECK-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nnan nsz arcp contract reassoc nofpexcept V_MAX_F64_e64 0, [[BUFFER_LOAD_DWORDX2_OFFEN]], 0, [[BUFFER_LOAD_DWORDX2_OFFEN]], 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: $exec = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
|
||||
; CHECK-NEXT: early-clobber $vgpr0 = V_MOV_B32_e32 [[V_MAX_F64_e64_]].sub0, implicit $exec
|
||||
; CHECK-NEXT: early-clobber $vgpr1 = V_MOV_B32_e32 [[V_MAX_F64_e64_]].sub1, implicit $exec
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:vgpr_32 = COPY $vgpr0
|
||||
%2:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN %1:vgpr_32, %0:sgpr_128, 0, 0, 0, 0, implicit $exec
|
||||
@ -308,15 +410,46 @@ body: |
|
||||
---
|
||||
# Check that WQM marking occurs correctly through phi nodes in live range graph.
|
||||
# If not then initial V_MOV will not be in WQM.
|
||||
#
|
||||
#CHECK-LABEL: name: test_wqm_lr_phi
|
||||
#CHECK: COPY $exec
|
||||
#CHECK-NEXT: S_WQM
|
||||
#CHECK-NEXT: V_MOV_B32_e32 -10
|
||||
#CHECK-NEXT: V_MOV_B32_e32 0
|
||||
name: test_wqm_lr_phi
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_wqm_lr_phi
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec
|
||||
; CHECK-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc
|
||||
; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 -10, implicit $exec
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: [[S_GETPC_B64_:%[0-9]+]]:sreg_64 = S_GETPC_B64
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[S_GETPC_B64_]], 32, 0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $vcc = V_CMP_LT_U32_e64 4, 4, implicit $exec
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc
|
||||
; CHECK-NEXT: S_BRANCH %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_ADD_U32_e32 1, [[V_MOV_B32_e32_]].sub1, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_U32_e32 1, [[V_MOV_B32_e32_]].sub1, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.4
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.4:
|
||||
; CHECK-NEXT: $exec = S_AND_B64 $exec, [[COPY]], implicit-def $scc
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[IMAGE_SAMPLE_V4_V2_:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX8_IMM]], [[DEF]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[IMAGE_SAMPLE_V4_V2_]].sub0
|
||||
; CHECK-NEXT: $vgpr1 = COPY [[IMAGE_SAMPLE_V4_V2_]].sub1
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
|
||||
bb.0:
|
||||
undef %0.sub0:vreg_64 = V_MOV_B32_e32 -10, implicit $exec
|
||||
%0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
|
||||
@ -345,14 +478,20 @@ body: |
|
||||
...
|
||||
|
||||
---
|
||||
#CHECK-LABEL: name: no_wqm_in_cs
|
||||
#CHECK-NOT: S_WQM
|
||||
name: no_wqm_in_cs
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr1, $vgpr2
|
||||
|
||||
; CHECK-LABEL: name: no_wqm_in_cs
|
||||
; CHECK: liveins: $vgpr1, $vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr2
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: dead [[IMAGE_SAMPLE_V4_V2_:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[COPY]], [[DEF]], [[DEF1]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
undef %0.sub0:vreg_64 = COPY $vgpr1
|
||||
%0.sub1:vreg_64 = COPY $vgpr2
|
||||
%100:sgpr_256 = IMPLICIT_DEF
|
||||
@ -362,14 +501,20 @@ body: |
|
||||
...
|
||||
|
||||
---
|
||||
#CHECK-LABEL: name: no_wqm_in_es
|
||||
#CHECK-NOT: S_WQM
|
||||
name: no_wqm_in_es
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr1, $vgpr2
|
||||
|
||||
; CHECK-LABEL: name: no_wqm_in_es
|
||||
; CHECK: liveins: $vgpr1, $vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr2
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: dead [[IMAGE_SAMPLE_V4_V2_:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[COPY]], [[DEF]], [[DEF1]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
undef %0.sub0:vreg_64 = COPY $vgpr1
|
||||
%0.sub1:vreg_64 = COPY $vgpr2
|
||||
%100:sgpr_256 = IMPLICIT_DEF
|
||||
@ -379,14 +524,20 @@ body: |
|
||||
...
|
||||
|
||||
---
|
||||
#CHECK-LABEL: name: no_wqm_in_gs
|
||||
#CHECK-NOT: S_WQM
|
||||
name: no_wqm_in_gs
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr1, $vgpr2
|
||||
|
||||
; CHECK-LABEL: name: no_wqm_in_gs
|
||||
; CHECK: liveins: $vgpr1, $vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr2
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: dead [[IMAGE_SAMPLE_V4_V2_:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[COPY]], [[DEF]], [[DEF1]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
undef %0.sub0:vreg_64 = COPY $vgpr1
|
||||
%0.sub1:vreg_64 = COPY $vgpr2
|
||||
%100:sgpr_256 = IMPLICIT_DEF
|
||||
@ -396,14 +547,20 @@ body: |
|
||||
...
|
||||
|
||||
---
|
||||
#CHECK-LABEL: name: no_wqm_in_hs
|
||||
#CHECK-NOT: S_WQM
|
||||
name: no_wqm_in_hs
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr1, $vgpr2
|
||||
|
||||
; CHECK-LABEL: name: no_wqm_in_hs
|
||||
; CHECK: liveins: $vgpr1, $vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr2
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: dead [[IMAGE_SAMPLE_V4_V2_:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[COPY]], [[DEF]], [[DEF1]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
undef %0.sub0:vreg_64 = COPY $vgpr1
|
||||
%0.sub1:vreg_64 = COPY $vgpr2
|
||||
%100:sgpr_256 = IMPLICIT_DEF
|
||||
@ -413,14 +570,20 @@ body: |
|
||||
...
|
||||
|
||||
---
|
||||
#CHECK-LABEL: name: no_wqm_in_ls
|
||||
#CHECK-NOT: S_WQM
|
||||
name: no_wqm_in_ls
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr1, $vgpr2
|
||||
|
||||
; CHECK-LABEL: name: no_wqm_in_ls
|
||||
; CHECK: liveins: $vgpr1, $vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr2
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: dead [[IMAGE_SAMPLE_V4_V2_:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[COPY]], [[DEF]], [[DEF1]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
undef %0.sub0:vreg_64 = COPY $vgpr1
|
||||
%0.sub1:vreg_64 = COPY $vgpr2
|
||||
%100:sgpr_256 = IMPLICIT_DEF
|
||||
@ -430,14 +593,20 @@ body: |
|
||||
...
|
||||
|
||||
---
|
||||
#CHECK-LABEL: name: no_wqm_in_vs
|
||||
#CHECK-NOT: S_WQM
|
||||
name: no_wqm_in_vs
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr1, $vgpr2
|
||||
|
||||
; CHECK-LABEL: name: no_wqm_in_vs
|
||||
; CHECK: liveins: $vgpr1, $vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr2
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: dead [[IMAGE_SAMPLE_V4_V2_:%[0-9]+]]:vreg_128 = IMAGE_SAMPLE_V4_V2 [[COPY]], [[DEF]], [[DEF1]], 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
||||
undef %0.sub0:vreg_64 = COPY $vgpr1
|
||||
%0.sub1:vreg_64 = COPY $vgpr2
|
||||
%100:sgpr_256 = IMPLICIT_DEF
|
||||
|
Loading…
x
Reference in New Issue
Block a user