
The new instruction represents the unknown number of waitcnts needed at a release operation to ensure that prior direct loads to LDS (formerly called LDS DMA) are completed. The instruction is replaced in SIInsertWaitcnts with a suitable value for vmcnt(). Co-authored-by: Austin Kerbow <austin.kerbow@amd.com>.
134 lines
6.1 KiB
YAML
134 lines
6.1 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s
|
|
|
|
|
|
# Expected vmcnt(0) since the direct load is the only load.
|
|
---
|
|
name: dma_then_fence
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: dma_then_fence
|
|
; GCN: S_WAITCNT 0
|
|
; GCN-NEXT: $m0 = S_MOV_B32 0
|
|
; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3)
|
|
; GCN-NEXT: S_WAITCNT 3952
|
|
; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$m0 = S_MOV_B32 0
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
|
|
S_WAITCNT_lds_direct
|
|
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# Expected vmcnt(1) since the global load is not processed by SIInsertWaitcnts.
|
|
|
|
---
|
|
name: dma_then_global_load
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: dma_then_global_load
|
|
; GCN: S_WAITCNT 0
|
|
; GCN-NEXT: $m0 = S_MOV_B32 0
|
|
; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3)
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAITCNT 3953
|
|
; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$m0 = S_MOV_B32 0
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
|
|
S_WAITCNT_lds_direct
|
|
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# Expected no vmcnt since there is no direct load to LDS, and the global load is not processed by SIInsertWaitcnts.
|
|
|
|
---
|
|
name: no_dma_just_fence
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: no_dma_just_fence
|
|
; GCN: S_WAITCNT 0
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
|
|
S_WAITCNT_lds_direct
|
|
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# Expected vmcnt(1) since the global load is not processed by SIInsertWaitcnts.
|
|
|
|
---
|
|
name: dma_then_system_fence
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: dma_then_system_fence
|
|
; GCN: S_WAITCNT 0
|
|
; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3)
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAITCNT 3953
|
|
; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
|
|
S_WAITCNT_lds_direct
|
|
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# The computed vmcnt(1) gets merged with the existing vmcnt(0).
|
|
|
|
---
|
|
name: merge_with_prev_wait
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: merge_with_prev_wait
|
|
; GCN: S_WAITCNT 0
|
|
; GCN-NEXT: $m0 = S_MOV_B32 0
|
|
; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3)
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAITCNT 3952
|
|
; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$m0 = S_MOV_B32 0
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
|
|
S_WAITCNT 3952
|
|
S_WAITCNT_lds_direct
|
|
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# The computed vmcnt(1) gets merged with the existing vmcnt(0).
|
|
|
|
---
|
|
name: merge_with_next_wait
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: merge_with_next_wait
|
|
; GCN: S_WAITCNT 0
|
|
; GCN-NEXT: $m0 = S_MOV_B32 0
|
|
; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3)
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAITCNT 3952
|
|
; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$m0 = S_MOV_B32 0
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
|
|
S_WAITCNT_lds_direct
|
|
S_WAITCNT 3952
|
|
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|