On GFX9, BUFFER_WBL2 is used to write back dirty cache lines and requires an s_waitcnt vmcnt(0) afterwards to ensure completion. This patch fixes by incrementing vmcnt for buffer_wbl2 instruction --------- Co-authored-by: Jay Foad <jay.foad@gmail.com>
58 lines
2.3 KiB
LLVM
58 lines
2.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950 %s
|
|
|
|
; Test that vmcnt(0) is correctly preserved between buffer_wbl2 and atomic
|
|
; when there are global memory stores that need to be written back.
|
|
|
|
define void @global_store_different_block(ptr addrspace(1) %data_ptr, ptr addrspace(1) %atomic_ptr, i1 %cond) {
|
|
; GFX950-LABEL: global_store_different_block:
|
|
; GFX950: ; %bb.0: ; %entry
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_and_b32_e32 v4, 1, v4
|
|
; GFX950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4
|
|
; GFX950-NEXT: v_mov_b32_e32 v4, 42
|
|
; GFX950-NEXT: global_store_dword v[0:1], v4, off
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
|
; GFX950-NEXT: s_cbranch_execz .LBB0_2
|
|
; GFX950-NEXT: ; %bb.1: ; %do_atomic
|
|
; GFX950-NEXT: v_mov_b64_e32 v[0:1], 0
|
|
; GFX950-NEXT: buffer_wbl2 sc1
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-NEXT: global_atomic_swap_x2 v[2:3], v[0:1], off
|
|
; GFX950-NEXT: .LBB0_2: ; %exit
|
|
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
; Global store in entry block
|
|
store i32 42, ptr addrspace(1) %data_ptr, align 4
|
|
call void @llvm.amdgcn.s.waitcnt(i32 112)
|
|
br i1 %cond, label %do_atomic, label %exit
|
|
|
|
do_atomic:
|
|
%old = atomicrmw xchg ptr addrspace(1) %atomic_ptr, i64 0 syncscope("agent") release
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @global_store_then_atomic(ptr addrspace(1) %data_ptr, ptr addrspace(1) %atomic_ptr) {
|
|
; GFX950-LABEL: global_store_then_atomic:
|
|
; GFX950: ; %bb.0: ; %entry
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX950-NEXT: v_mov_b32_e32 v4, 42
|
|
; GFX950-NEXT: global_store_dword v[0:1], v4, off
|
|
; GFX950-NEXT: v_mov_b64_e32 v[0:1], 0
|
|
; GFX950-NEXT: buffer_wbl2 sc1
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-NEXT: global_atomic_swap_x2 v[2:3], v[0:1], off
|
|
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
store i32 42, ptr addrspace(1) %data_ptr, align 4
|
|
%old = atomicrmw xchg ptr addrspace(1) %atomic_ptr, i64 0 syncscope("agent") release
|
|
ret void
|
|
}
|