[AMDGPU] Insert waitcnt for non-global fence release in GFX12 (#159282)
A fence release could be followed by a barrier, so it should wait for the relevant memory accesses to complete, even if it is mmra-limited to LDS. So far, that would be skipped for non-global fence releases. Fixes SWDEV-554932.
This commit is contained in:
parent
b6c061e6a9
commit
3f8c7e9fa3
@ -2514,6 +2514,8 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
bool IsCrossAddrSpaceOrdering,
|
||||
Position Pos) const {
|
||||
bool Changed = false;
|
||||
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
|
||||
@ -2521,53 +2523,51 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
|
||||
// writeback as all memory operations by the same thread are
|
||||
// sequentially consistent, and no other thread can access scratch
|
||||
// memory.
|
||||
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
||||
if (Pos == Position::AFTER)
|
||||
++MI;
|
||||
|
||||
// Other address spaces do not have a cache.
|
||||
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
|
||||
return false;
|
||||
|
||||
if (Pos == Position::AFTER)
|
||||
++MI;
|
||||
|
||||
// global_wb is only necessary at system scope for GFX12.0,
|
||||
// they're also necessary at device scope for GFX12.5.
|
||||
//
|
||||
// Emitting it for lower scopes is a slow no-op, so we omit it
|
||||
// for performance.
|
||||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
|
||||
.addImm(AMDGPU::CPol::SCOPE_SYS);
|
||||
break;
|
||||
case SIAtomicScope::AGENT:
|
||||
// TODO DOCS
|
||||
if (ST.hasGFX1250Insts()) {
|
||||
// global_wb is only necessary at system scope for GFX12.0,
|
||||
// they're also necessary at device scope for GFX12.5.
|
||||
//
|
||||
// Emitting it for lower scopes is a slow no-op, so we omit it
|
||||
// for performance.
|
||||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
|
||||
.addImm(AMDGPU::CPol::SCOPE_DEV);
|
||||
.addImm(AMDGPU::CPol::SCOPE_SYS);
|
||||
Changed = true;
|
||||
break;
|
||||
case SIAtomicScope::AGENT:
|
||||
// TODO DOCS
|
||||
if (ST.hasGFX1250Insts()) {
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
|
||||
.addImm(AMDGPU::CPol::SCOPE_DEV);
|
||||
Changed = true;
|
||||
}
|
||||
break;
|
||||
case SIAtomicScope::CLUSTER:
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
// No WB necessary, but we still have to wait.
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
case SIAtomicScope::SINGLETHREAD:
|
||||
// No WB or wait necessary here, but insertWait takes care of that.
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unsupported synchronization scope");
|
||||
}
|
||||
break;
|
||||
case SIAtomicScope::CLUSTER:
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
// No WB necessary, but we still have to wait.
|
||||
break;
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
case SIAtomicScope::SINGLETHREAD:
|
||||
// No WB or wait necessary here.
|
||||
return false;
|
||||
default:
|
||||
llvm_unreachable("Unsupported synchronization scope");
|
||||
}
|
||||
|
||||
if (Pos == Position::AFTER)
|
||||
--MI;
|
||||
if (Pos == Position::AFTER)
|
||||
--MI;
|
||||
}
|
||||
|
||||
// We always have to wait for previous memory operations (load/store) to
|
||||
// complete, whether we inserted a WB or not. If we inserted a WB (storecnt),
|
||||
// we of course need to wait for that as well.
|
||||
insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
|
||||
IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
|
||||
Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
|
||||
IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
|
||||
|
||||
return true;
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
|
||||
|
||||
122
llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers-mmra.ll
Normal file
122
llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers-mmra.ll
Normal file
@ -0,0 +1,122 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
|
||||
define float @test_barrier_workgroup_local_mmra(ptr addrspace(3) noundef %x, ptr addrspace(3) noundef %y, float %val) {
|
||||
; GFX10-WGP-LABEL: test_barrier_workgroup_local_mmra:
|
||||
; GFX10-WGP: ; %bb.0:
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v2
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_barrier
|
||||
; GFX10-WGP-NEXT: ds_read_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-WGP-LABEL: test_barrier_workgroup_local_mmra:
|
||||
; GFX11-WGP: ; %bb.0:
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: ds_store_b32 v0, v2
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_barrier
|
||||
; GFX11-WGP-NEXT: ds_load_b32 v0, v1
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-WGP-LABEL: test_barrier_workgroup_local_mmra:
|
||||
; GFX12-WGP: ; %bb.0:
|
||||
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-WGP-NEXT: ds_store_b32 v0, v2
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_barrier_signal -1
|
||||
; GFX12-WGP-NEXT: s_barrier_wait -1
|
||||
; GFX12-WGP-NEXT: ds_load_b32 v0, v1
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: test_barrier_workgroup_local_mmra:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: ds_store_b32 v0, v2
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_barrier_signal -1
|
||||
; GFX1250-NEXT: s_barrier_wait -1
|
||||
; GFX1250-NEXT: ds_load_b32 v0, v1
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
store float %val, ptr addrspace(3) %x
|
||||
fence syncscope("workgroup") release, !mmra !0
|
||||
tail call void @llvm.amdgcn.s.barrier()
|
||||
fence syncscope("workgroup") acquire, !mmra !0
|
||||
%ret = load float, ptr addrspace(3) %y
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @test_barrier_workgroup_global_mmra(ptr addrspace(1) noundef %x, ptr addrspace(1) noundef %y, float %val) {
|
||||
; GFX10-WGP-LABEL: test_barrier_workgroup_global_mmra:
|
||||
; GFX10-WGP: ; %bb.0:
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: global_store_dword v[0:1], v4, off
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: s_barrier
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_load_dword v0, v[2:3], off
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-WGP-LABEL: test_barrier_workgroup_global_mmra:
|
||||
; GFX11-WGP: ; %bb.0:
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v[0:1], v4, off
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_barrier
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: global_load_b32 v0, v[2:3], off
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-WGP-LABEL: test_barrier_workgroup_global_mmra:
|
||||
; GFX12-WGP: ; %bb.0:
|
||||
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-WGP-NEXT: global_store_b32 v[0:1], v4, off
|
||||
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-WGP-NEXT: s_barrier_signal -1
|
||||
; GFX12-WGP-NEXT: s_barrier_wait -1
|
||||
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-WGP-NEXT: global_load_b32 v0, v[2:3], off
|
||||
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-WGP-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: test_barrier_workgroup_global_mmra:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v[0:1], v4, off
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_barrier_signal -1
|
||||
; GFX1250-NEXT: s_barrier_wait -1
|
||||
; GFX1250-NEXT: global_load_b32 v0, v[2:3], off
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
store float %val, ptr addrspace(1) %x
|
||||
fence syncscope("workgroup") release, !mmra !1
|
||||
tail call void @llvm.amdgcn.s.barrier()
|
||||
fence syncscope("workgroup") acquire, !mmra !1
|
||||
%ret = load float, ptr addrspace(1) %y
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
!0 = !{!"amdgpu-synchronize-as", !"local"}
|
||||
!1 = !{!"amdgpu-synchronize-as", !"global"}
|
||||
@ -143,14 +143,17 @@ define amdgpu_kernel void @workgroup_release_fence() {
|
||||
;
|
||||
; GFX12-WGP-LABEL: workgroup_release_fence:
|
||||
; GFX12-WGP: ; %bb.0: ; %entry
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-CU-LABEL: workgroup_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
@ -213,14 +216,17 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
|
||||
;
|
||||
; GFX12-WGP-LABEL: workgroup_acq_rel_fence:
|
||||
; GFX12-WGP: ; %bb.0: ; %entry
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-CU-LABEL: workgroup_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
@ -283,14 +289,17 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
|
||||
;
|
||||
; GFX12-WGP-LABEL: workgroup_seq_cst_fence:
|
||||
; GFX12-WGP: ; %bb.0: ; %entry
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-CU-LABEL: workgroup_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
@ -670,14 +679,17 @@ define amdgpu_kernel void @agent_release_fence() {
|
||||
;
|
||||
; GFX12-WGP-LABEL: agent_release_fence:
|
||||
; GFX12-WGP: ; %bb.0: ; %entry
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-CU-LABEL: agent_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
@ -740,14 +752,17 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
|
||||
;
|
||||
; GFX12-WGP-LABEL: agent_acq_rel_fence:
|
||||
; GFX12-WGP: ; %bb.0: ; %entry
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-CU-LABEL: agent_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
@ -810,14 +825,17 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
|
||||
;
|
||||
; GFX12-WGP-LABEL: agent_seq_cst_fence:
|
||||
; GFX12-WGP: ; %bb.0: ; %entry
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-CU-LABEL: agent_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
@ -1197,14 +1215,17 @@ define amdgpu_kernel void @system_release_fence() {
|
||||
;
|
||||
; GFX12-WGP-LABEL: system_release_fence:
|
||||
; GFX12-WGP: ; %bb.0: ; %entry
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-CU-LABEL: system_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence release, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
@ -1267,14 +1288,17 @@ define amdgpu_kernel void @system_acq_rel_fence() {
|
||||
;
|
||||
; GFX12-WGP-LABEL: system_acq_rel_fence:
|
||||
; GFX12-WGP: ; %bb.0: ; %entry
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-CU-LABEL: system_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
@ -1337,14 +1361,17 @@ define amdgpu_kernel void @system_seq_cst_fence() {
|
||||
;
|
||||
; GFX12-WGP-LABEL: system_seq_cst_fence:
|
||||
; GFX12-WGP: ; %bb.0: ; %entry
|
||||
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-CU-LABEL: system_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user