diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 48904582adba..fde67e9e2d83 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -509,7 +509,7 @@ class MUBUF_Load_Pseudo { - let LGKM_CNT = !not(IsAsync); + let LGKM_CNT = 0; let VM_CNT = !not(IsAsync); let ASYNC_CNT = IsAsync; let is_flat_global = 1; diff --git a/llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll b/llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll index 7e9f21b94bea..a526b1ec1650 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll @@ -54,7 +54,6 @@ define amdgpu_ps void @global_load_lds_dword_saddr(ptr addrspace(1) inreg nocapt ; GFX90A-NEXT: s_mov_b32 m0, s4 ; GFX90A-NEXT: s_nop 0 ; GFX90A-NEXT: global_load_dword v0, s[2:3] offset:32 slc lds -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] ; GFX90A-NEXT: s_endpgm main_body: diff --git a/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir b/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir index 21372c06d322..0e64d0430668 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir @@ -67,6 +67,45 @@ body: | ... +# Test that GLOBAL_LOAD_LDS does not increment lgkmcnt (LGKM_CNT = 0). +# GCN-LABEL: name: ds_read_global_load_lds_use_ds_data +# GCN: DS_READ_B32_gfx9 +# GCN-NEXT: GLOBAL_LOAD_LDS_DWORD +# GCN-NEXT: S_WAITCNT 49279 +# lgkmcnt(0) +# GCN-NEXT: V_ADD_U32_e32 +--- +name: ds_read_global_load_lds_use_ds_data +body: | + bb.0: + $m0 = S_MOV_B32 0 + $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`) + GLOBAL_LOAD_LDS_DWORD $vgpr2_vgpr3, 4, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) + $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + S_ENDPGM 0 + +... + +# Test that BUFFER_LOAD_DWORD_LDS does not increment lgkmcnt. +# DS_READ increments lgkmcnt. When using the DS_READ result, we wait for lgkmcnt(0). +# GCN-LABEL: name: ds_read_buffer_load_lds_use_ds_data +# GCN: DS_READ_B32_gfx9 +# GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN +# GCN-NEXT: S_WAITCNT 49279 +# lgkmcnt(0) +# GCN-NEXT: V_ADD_U32_e32 +--- +name: ds_read_buffer_load_lds_use_ds_data +body: | + bb.0: + $m0 = S_MOV_B32 0 + $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`) + BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`) + $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + S_ENDPGM 0 + +... + # GCN-LABEL: name: scratch_load_lds_dword_ds_read # GCN: SCRATCH_LOAD_LDS_DWORD # GCN-NEXT: S_WAITCNT 3952 diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index 1c986a02e8bd..fba7720b37bf 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -327,7 +327,7 @@ define void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture in ; GFX942-NEXT: s_mov_b32 m0, s2 ; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1 -; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %voffset.64 = zext i32 %voffset to i64