Add new event SCC_WRITE for s_barrier_signal_isfirst and s_barrier_leave, instructions that write to SCC, counter is KM_CNT. Also start tracking SCC for reads and writes. s_barrier_wait on the same barrier guarantees that the SCC write from s_barrier_signal_isfirst has landed, no need to insert s_wait_kmcnt.
174 lines
7.1 KiB
YAML
174 lines
7.1 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s
|
|
|
|
---
|
|
name: scc_write_in_other_block
|
|
body: |
|
|
; GFX12-LABEL: name: scc_write_in_other_block
|
|
; GFX12: bb.0:
|
|
; GFX12-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
|
|
; GFX12-NEXT: S_WAIT_EXPCNT 0
|
|
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
|
|
; GFX12-NEXT: S_WAIT_BVHCNT 0
|
|
; GFX12-NEXT: S_WAIT_KMCNT 0
|
|
; GFX12-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
|
|
; GFX12-NEXT: V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
|
|
; GFX12-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: bb.1:
|
|
; GFX12-NEXT: successors: %bb.2(0x80000000)
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit killed $scc
|
|
; GFX12-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit killed $scc
|
|
; GFX12-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
|
|
; GFX12-NEXT: GLOBAL_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, implicit $exec
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: bb.2:
|
|
; GFX12-NEXT: S_WAIT_KMCNT 0
|
|
; GFX12-NEXT: renamable $sgpr1 = S_CSELECT_B32 10, 20, implicit killed $scc
|
|
; GFX12-NEXT: $vgpr5 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
|
|
; GFX12-NEXT: GLOBAL_STORE_DWORD $vgpr6_vgpr7, $vgpr5, 0, 0, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
|
|
V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
|
|
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
|
|
bb.1:
|
|
renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit killed $scc
|
|
S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit killed $scc
|
|
$vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, implicit $exec
|
|
|
|
bb.2:
|
|
renamable $sgpr1 = S_CSELECT_B32 10, 20, implicit killed $scc
|
|
$vgpr5 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr6_vgpr7, $vgpr5, 0, 0, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: scc_write_in_other_block_with_barrier_wait
|
|
body: |
|
|
; GFX12-LABEL: name: scc_write_in_other_block_with_barrier_wait
|
|
; GFX12: bb.0:
|
|
; GFX12-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
|
|
; GFX12-NEXT: S_WAIT_EXPCNT 0
|
|
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
|
|
; GFX12-NEXT: S_WAIT_BVHCNT 0
|
|
; GFX12-NEXT: S_WAIT_KMCNT 0
|
|
; GFX12-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
|
|
; GFX12-NEXT: V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
|
|
; GFX12-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: bb.1:
|
|
; GFX12-NEXT: successors: %bb.2(0x80000000)
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit killed $scc
|
|
; GFX12-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit killed $scc
|
|
; GFX12-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
|
|
; GFX12-NEXT: GLOBAL_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, implicit $exec
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: bb.2:
|
|
; GFX12-NEXT: S_BARRIER_WAIT -1
|
|
; GFX12-NEXT: renamable $sgpr1 = S_CSELECT_B32 10, 20, implicit killed $scc
|
|
; GFX12-NEXT: $vgpr5 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
|
|
; GFX12-NEXT: GLOBAL_STORE_DWORD $vgpr6_vgpr7, $vgpr5, 0, 0, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
|
|
V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
|
|
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
|
|
bb.1:
|
|
renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit killed $scc
|
|
S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit killed $scc
|
|
$vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, implicit $exec
|
|
|
|
bb.2:
|
|
S_BARRIER_WAIT -1
|
|
renamable $sgpr1 = S_CSELECT_B32 10, 20, implicit killed $scc
|
|
$vgpr5 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr6_vgpr7, $vgpr5, 0, 0, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: scc_write_in_multiple_blocks_with_barrier_wait
|
|
body: |
|
|
; GFX12-LABEL: name: scc_write_in_multiple_blocks_with_barrier_wait
|
|
; GFX12: bb.0:
|
|
; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
|
|
; GFX12-NEXT: S_WAIT_EXPCNT 0
|
|
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
|
|
; GFX12-NEXT: S_WAIT_BVHCNT 0
|
|
; GFX12-NEXT: S_WAIT_KMCNT 0
|
|
; GFX12-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
|
|
; GFX12-NEXT: V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
|
|
; GFX12-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: bb.1:
|
|
; GFX12-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr1, implicit-def $exec, implicit $exec
|
|
; GFX12-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: bb.2:
|
|
; GFX12-NEXT: successors: %bb.5(0x80000000)
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM 0, implicit-def $scc, implicit killed $scc
|
|
; GFX12-NEXT: S_BRANCH %bb.5
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: bb.3:
|
|
; GFX12-NEXT: successors: %bb.5(0x80000000)
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM 1, implicit-def $scc, implicit killed $scc
|
|
; GFX12-NEXT: S_BRANCH %bb.5
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: bb.4:
|
|
; GFX12-NEXT: successors: %bb.5(0x80000000)
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit killed $scc
|
|
; GFX12-NEXT: {{ $}}
|
|
; GFX12-NEXT: bb.5:
|
|
; GFX12-NEXT: S_BARRIER_WAIT -1
|
|
; GFX12-NEXT: S_WAIT_KMCNT 0
|
|
; GFX12-NEXT: renamable $sgpr1 = S_CSELECT_B32 10, 20, implicit killed $scc
|
|
; GFX12-NEXT: $vgpr5 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
|
|
; GFX12-NEXT: GLOBAL_STORE_DWORD $vgpr6_vgpr7, $vgpr5, 0, 0, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
|
|
V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
|
|
S_CBRANCH_EXECZ %bb.4, implicit $exec
|
|
|
|
bb.1:
|
|
V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr1, implicit-def $exec, implicit $exec
|
|
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
|
|
|
bb.2:
|
|
S_BARRIER_SIGNAL_ISFIRST_IMM 0, implicit-def $scc, implicit killed $scc
|
|
S_BRANCH %bb.5
|
|
|
|
bb.3:
|
|
S_BARRIER_SIGNAL_ISFIRST_IMM 1, implicit-def $scc, implicit killed $scc
|
|
S_BRANCH %bb.5
|
|
|
|
bb.4:
|
|
S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit killed $scc
|
|
|
|
bb.5:
|
|
S_BARRIER_WAIT -1
|
|
renamable $sgpr1 = S_CSELECT_B32 10, 20, implicit killed $scc
|
|
$vgpr5 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr6_vgpr7, $vgpr5, 0, 0, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|