
[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942). The only notable difference in the test output is that gfx942 does not force the use of sc0 and sc1 on stores while gfx940 and gfx941 do (cf. https://reviews.llvm.org/D149986). For SWDEV-512631
17227 lines
716 KiB
LLVM
17227 lines
716 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
|
|
|
|
define amdgpu_kernel void @local_agent_unordered_load(
|
|
; GFX6-LABEL: local_agent_unordered_load:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr4
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr5
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: ds_read_b32 v1, v0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_unordered_load:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: ds_read_b32 v1, v0
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_unordered_load:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_unordered_load:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_unordered_load:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_read_b32 v1, v0
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_unordered_load:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_unordered_load:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_unordered_load:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_unordered_load:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_unordered_load:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_unordered_load:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_unordered_load:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_unordered_load:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") unordered, align 4
|
|
store i32 %val, ptr addrspace(3) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_monotonic_load(
|
|
; GFX6-LABEL: local_agent_monotonic_load:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr4
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr5
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: ds_read_b32 v1, v0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_monotonic_load:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: ds_read_b32 v1, v0
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_monotonic_load:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_monotonic_load:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_monotonic_load:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_read_b32 v1, v0
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_monotonic_load:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_monotonic_load:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_load:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_monotonic_load:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_monotonic_load:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_monotonic_load:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_monotonic_load:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_monotonic_load:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") monotonic, align 4
|
|
store i32 %val, ptr addrspace(3) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acquire_load(
|
|
; GFX6-LABEL: local_agent_acquire_load:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr4
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr5
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: ds_read_b32 v1, v0
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acquire_load:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: ds_read_b32 v1, v0
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acquire_load:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acquire_load:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acquire_load:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_read_b32 v1, v0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acquire_load:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acquire_load:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_load:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acquire_load:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acquire_load:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acquire_load:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acquire_load:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acquire_load:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") acquire, align 4
|
|
store i32 %val, ptr addrspace(3) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_seq_cst_load(
|
|
; GFX6-LABEL: local_agent_seq_cst_load:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr4
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr5
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_read_b32 v1, v0
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_seq_cst_load:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_read_b32 v1, v0
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_seq_cst_load:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_seq_cst_load:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_load:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_read_b32 v1, v0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_seq_cst_load:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_seq_cst_load:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_load:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_load:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_seq_cst_load:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_seq_cst_load:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_seq_cst_load:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_seq_cst_load:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") seq_cst, align 4
|
|
store i32 %val, ptr addrspace(3) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_unordered_store(
|
|
; GFX6-LABEL: local_agent_unordered_store:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_unordered_store:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_unordered_store:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_unordered_store:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_unordered_store:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_unordered_store:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_unordered_store:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_unordered_store:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_unordered_store:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_unordered_store:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_unordered_store:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_unordered_store:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_unordered_store:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
i32 %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_monotonic_store(
|
|
; GFX6-LABEL: local_agent_monotonic_store:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_monotonic_store:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_monotonic_store:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_monotonic_store:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_monotonic_store:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_monotonic_store:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_monotonic_store:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_store:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_monotonic_store:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_monotonic_store:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_monotonic_store:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_monotonic_store:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_monotonic_store:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
i32 %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_release_store(
|
|
; GFX6-LABEL: local_agent_release_store:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_release_store:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_release_store:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_release_store:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_release_store:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_release_store:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_release_store:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_release_store:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_release_store:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_release_store:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_release_store:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_release_store:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_release_store:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
i32 %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") release, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_seq_cst_store(
|
|
; GFX6-LABEL: local_agent_seq_cst_store:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_seq_cst_store:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_seq_cst_store:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_seq_cst_store:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_store:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_seq_cst_store:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_seq_cst_store:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_store:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_store:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_seq_cst_store:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_seq_cst_store:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_seq_cst_store:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_seq_cst_store:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
i32 %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_monotonic_atomicrmw(
|
|
; GFX6-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_monotonic_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_monotonic_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acquire_atomicrmw(
|
|
; GFX6-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acquire_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acquire_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_release_atomicrmw(
|
|
; GFX6-LABEL: local_agent_release_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_release_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_release_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_release_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_release_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_release_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_release_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_release_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_release_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_release_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_release_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_release_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_release_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") release
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
|
|
; GFX6-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acq_rel_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acq_rel
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
|
|
; GFX6-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_seq_cst_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw(
|
|
; GFX6-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acquire_ret_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acquire
|
|
store i32 %val, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
|
|
; GFX6-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acq_rel_ret_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acq_rel
|
|
store i32 %val, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
|
|
; GFX6-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_seq_cst_ret_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") seq_cst
|
|
store i32 %val, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_monotonic_monotonic_cmpxchg(
|
|
; GFX6-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_monotonic_monotonic_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acquire_monotonic_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg(
|
|
; GFX6-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_release_monotonic_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg(
|
|
; GFX6-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_monotonic_acquire_cmpxchg(
|
|
; GFX6-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_monotonic_acquire_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acquire_acquire_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_release_acquire_cmpxchg(
|
|
; GFX6-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_release_acquire_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acq_rel_acquire_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg(
|
|
; GFX6-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_seq_cst_acquire_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg(
|
|
; GFX6-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acquire_seq_cst_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg(
|
|
; GFX6-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_release_seq_cst_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg(
|
|
; GFX6-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_monotonic_monotonic_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_release_monotonic_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release monotonic
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_monotonic_acquire_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_release_acquire_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release acquire
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") release seq_cst
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: buffer_gl0_inv
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: buffer_gl0_inv
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_unordered_load(
|
|
; GFX6-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr4
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr5
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: ds_read_b32 v1, v0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: ds_read_b32 v1, v0
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_unordered_load:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_read_b32 v1, v0
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_unordered_load:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") unordered, align 4
|
|
store i32 %val, ptr addrspace(3) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_monotonic_load(
|
|
; GFX6-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr4
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr5
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: ds_read_b32 v1, v0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: ds_read_b32 v1, v0
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_load:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_read_b32 v1, v0
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_monotonic_load:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") monotonic, align 4
|
|
store i32 %val, ptr addrspace(3) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acquire_load(
|
|
; GFX6-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr4
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr5
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: ds_read_b32 v1, v0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: ds_read_b32 v1, v0
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_load:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_read_b32 v1, v0
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acquire_load:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") acquire, align 4
|
|
store i32 %val, ptr addrspace(3) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_seq_cst_load(
|
|
; GFX6-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr4
|
|
; GFX6-NEXT: ; kill: def $sgpr6 killed $sgpr5
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: ds_read_b32 v1, v0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: ds_read_b32 v1, v0
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: ds_read_b32 v1, v0
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_load:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_read_b32 v1, v0
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_seq_cst_load:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: ds_load_b32 v1, v0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") seq_cst, align 4
|
|
store i32 %val, ptr addrspace(3) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_unordered_store(
|
|
; GFX6-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_unordered_store:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_unordered_store:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
i32 %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_monotonic_store(
|
|
; GFX6-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_store:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_monotonic_store:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
i32 %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_release_store(
|
|
; GFX6-LABEL: local_agent_one_as_release_store:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_release_store:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_release_store:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_release_store:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_store:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_release_store:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_release_store:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_store:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_store:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_release_store:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_release_store:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_release_store:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_release_store:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
i32 %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") release, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_seq_cst_store(
|
|
; GFX6-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_store:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_seq_cst_store:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
i32 %in, ptr addrspace(3) %out) {
|
|
entry:
|
|
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_monotonic_atomicrmw(
|
|
; GFX6-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_monotonic_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acquire_atomicrmw(
|
|
; GFX6-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acquire_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_release_atomicrmw(
|
|
; GFX6-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_release_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") release
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acq_rel_atomicrmw(
|
|
; GFX6-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acq_rel_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acq_rel
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_seq_cst_atomicrmw(
|
|
; GFX6-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr5
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_seq_cst_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acquire_ret_atomicrmw(
|
|
; GFX6-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acquire
|
|
store i32 %val, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acq_rel_ret_atomicrmw(
|
|
; GFX6-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acq_rel
|
|
store i32 %val, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_seq_cst_ret_atomicrmw(
|
|
; GFX6-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in) {
|
|
entry:
|
|
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") seq_cst
|
|
store i32 %val, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_release_monotonic_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acquire_acquire_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_release_acquire_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_release_acquire_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_release_seq_cst_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr6
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_release_monotonic_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acquire_acquire_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_release_acquire_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_release_seq_cst_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
|
|
; GFX6-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX6: ; %bb.0: ; %entry
|
|
; GFX6-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX6-NEXT: ; kill: def $sgpr5 killed $sgpr4
|
|
; GFX6-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX6-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX6-NEXT: s_mov_b32 m0, -1
|
|
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX6-NEXT: ds_write_b32 v0, v1
|
|
; GFX6-NEXT: s_endpgm
|
|
;
|
|
; GFX7-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX7: ; %bb.0: ; %entry
|
|
; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX7-NEXT: s_load_dword s5, s[8:9], 0x1
|
|
; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX7-NEXT: s_mov_b32 m0, -1
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX7-NEXT: ds_write_b32 v0, v1
|
|
; GFX7-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WGP-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX10-WGP: ; %bb.0: ; %entry
|
|
; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX10-CU-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX10-CU: ; %bb.0: ; %entry
|
|
; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-CU-NEXT: ds_write_b32 v0, v1
|
|
; GFX10-CU-NEXT: s_endpgm
|
|
;
|
|
; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; SKIP-CACHE-INV: ; %bb.0: ; %entry
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[4:5], 0x1
|
|
; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1
|
|
; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1
|
|
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
|
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
|
; SKIP-CACHE-INV-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX90A-TGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x4
|
|
; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s5
|
|
; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-NOTTGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX942-TGSPLIT: ; %bb.0: ; %entry
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4
|
|
; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
|
; GFX942-TGSPLIT-NEXT: s_endpgm
|
|
;
|
|
; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX11-WGP: ; %bb.0: ; %entry
|
|
; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX11-CU-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX11-CU: ; %bb.0: ; %entry
|
|
; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-CU-NEXT: s_endpgm
|
|
;
|
|
; GFX12-WGP-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX12-WGP: ; %bb.0: ; %entry
|
|
; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-WGP-NEXT: s_endpgm
|
|
;
|
|
; GFX12-CU-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
|
|
; GFX12-CU: ; %bb.0: ; %entry
|
|
; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
|
|
; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
|
|
; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
|
|
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s1
|
|
; GFX12-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
|
|
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
|
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
|
; GFX12-CU-NEXT: s_endpgm
|
|
ptr addrspace(3) %out, i32 %in, i32 %old) {
|
|
entry:
|
|
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
|
|
%val = cmpxchg volatile ptr addrspace(3) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
|
|
%val0 = extractvalue { i32, i1 } %val, 0
|
|
store i32 %val0, ptr addrspace(3) %out, align 4
|
|
ret void
|
|
}
|