`global_load_lds` and `buffer_load to lds` do only increment `vmcnt` and not touch `lgkmcnt`. This causes invalid `waitcnts` for some Triton kernels, similar to the added lit tests. Note that the change for buffer ops is not necesssary, i.e. the lit test passes even before this PR, because it seems like `SIInsertWaitcnts` does not use `LGKM_CNT` for buffer ops. But this change might prevent a bug in the future.
159 lines
6.5 KiB
YAML
159 lines
6.5 KiB
YAML
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s
|
|
|
|
# GCN-LABEL: name: buffer_load_dword_lds_ds_read
|
|
# GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
|
|
# GCN-NEXT: S_WAITCNT 3952
|
|
# vmcnt(0)
|
|
# GCN-NEXT: DS_READ_B32_gfx9
|
|
---
|
|
name: buffer_load_dword_lds_ds_read
|
|
body: |
|
|
bb.0:
|
|
$m0 = S_MOV_B32 0
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
|
|
$vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# GCN-LABEL: name: buffer_load_dword_lds_vmcnt_1
|
|
# GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
|
|
# GCN-NEXT: BUFFER_LOAD_DWORD_IDXEN
|
|
# GCN-NEXT: S_WAITCNT 3953
|
|
# vmcnt(1)
|
|
# GCN-NEXT: DS_READ_B32_gfx9
|
|
---
|
|
name: buffer_load_dword_lds_vmcnt_1
|
|
body: |
|
|
bb.0:
|
|
$m0 = S_MOV_B32 0
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
|
|
$vgpr10 = BUFFER_LOAD_DWORD_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`)
|
|
$vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# GCN-LABEL: name: buffer_load_dword_lds_flat_read
|
|
# GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
|
|
# GCN-NEXT: S_WAITCNT 3952
|
|
# vmcnt(0)
|
|
# GCN-NEXT: FLAT_LOAD_DWORD
|
|
---
|
|
name: buffer_load_dword_lds_flat_read
|
|
body: |
|
|
bb.0:
|
|
$m0 = S_MOV_B32 0
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
|
|
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr poison`)
|
|
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# GCN-LABEL: name: global_load_lds_dword_ds_read
|
|
# GCN: GLOBAL_LOAD_LDS_DWORD
|
|
# GCN-NEXT: S_WAITCNT 3952
|
|
# vmcnt(0)
|
|
# GCN-NEXT: DS_READ_B32_gfx9
|
|
---
|
|
name: global_load_lds_dword_ds_read
|
|
body: |
|
|
bb.0:
|
|
$m0 = S_MOV_B32 0
|
|
GLOBAL_LOAD_LDS_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
|
|
$vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# Test that GLOBAL_LOAD_LDS does not increment lgkmcnt (LGKM_CNT = 0).
|
|
# GCN-LABEL: name: ds_read_global_load_lds_use_ds_data
|
|
# GCN: DS_READ_B32_gfx9
|
|
# GCN-NEXT: GLOBAL_LOAD_LDS_DWORD
|
|
# GCN-NEXT: S_WAITCNT 49279
|
|
# lgkmcnt(0)
|
|
# GCN-NEXT: V_ADD_U32_e32
|
|
---
|
|
name: ds_read_global_load_lds_use_ds_data
|
|
body: |
|
|
bb.0:
|
|
$m0 = S_MOV_B32 0
|
|
$vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
|
|
GLOBAL_LOAD_LDS_DWORD $vgpr2_vgpr3, 4, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
|
|
$vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# Test that BUFFER_LOAD_DWORD_LDS does not increment lgkmcnt.
|
|
# DS_READ increments lgkmcnt. When using the DS_READ result, we wait for lgkmcnt(0).
|
|
# GCN-LABEL: name: ds_read_buffer_load_lds_use_ds_data
|
|
# GCN: DS_READ_B32_gfx9
|
|
# GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
|
|
# GCN-NEXT: S_WAITCNT 49279
|
|
# lgkmcnt(0)
|
|
# GCN-NEXT: V_ADD_U32_e32
|
|
---
|
|
name: ds_read_buffer_load_lds_use_ds_data
|
|
body: |
|
|
bb.0:
|
|
$m0 = S_MOV_B32 0
|
|
$vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
|
|
$vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# GCN-LABEL: name: scratch_load_lds_dword_ds_read
|
|
# GCN: SCRATCH_LOAD_LDS_DWORD
|
|
# GCN-NEXT: S_WAITCNT 3952
|
|
# vmcnt(0)
|
|
# GCN-NEXT: DS_READ_B32_gfx9
|
|
---
|
|
name: scratch_load_lds_dword_ds_read
|
|
body: |
|
|
bb.0:
|
|
$m0 = S_MOV_B32 0
|
|
SCRATCH_LOAD_LDS_DWORD $vgpr0, 4, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(5) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
|
|
$vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# GCN-LABEL: name: buffer_store_lds_dword_ds_read
|
|
# GCN: BUFFER_STORE_LDS_DWORD
|
|
# GCN-NEXT: DS_READ_B32_gfx9
|
|
---
|
|
name: buffer_store_lds_dword_ds_read
|
|
body: |
|
|
bb.0:
|
|
$m0 = S_MOV_B32 0
|
|
BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(3) poison` + 4), (store (s32) into `ptr addrspace(1) poison` + 4)
|
|
$vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# No need to wait before load from VMEM to LDS.
|
|
# GCN-LABEL: name: series_of_buffer_load_dword_lds_ds_read
|
|
# GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
|
|
# GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
|
|
# GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
|
|
# GCN-NEXT: S_WAITCNT 3952
|
|
# vmcnt(0)
|
|
# GCN-NEXT: DS_READ_B32_gfx9
|
|
---
|
|
name: series_of_buffer_load_dword_lds_ds_read
|
|
body: |
|
|
bb.0:
|
|
$m0 = S_MOV_B32 0
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
|
|
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 8), (store (s32) into `ptr addrspace(3) poison` + 8)
|
|
$vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
|
|
S_ENDPGM 0
|
|
|
|
...
|