llvm-project/llvm/test/CodeGen/AMDGPU/a-v-ds-atomic-cmpxchg.ll
Matt Arsenault 1959e12e7d
AMDGPU: Add agpr variants of multi-data DS instructions (#156420)
The instruction definitions for loads and stores do not
accurately model the operand constraints of loads and stores
with AGPRs. They use AV register classes, plus a hack
a hack in getRegClass/getOpRegClass to avoid using AGPRs or
AV classes with the multiple operand cases, but it did not
consider the 3 operand case.

Model this correctly by using separate all-VGPR and all-AGPR
variants for the cases with multiple data operands.

This does regress the assembler errors on gfx908 for the
multi-operand cases. It now reports a generic operand
invalid error for GPU instead of the specific message
that agpr loads and stores aren't supported.

In the future AMDGPURewriteAGPRCopyMFMA should be taught
to replace the VGPR forms with the AGPR ones.

Most of the diff is fighting the DS pseudo structure. The
mnemonic was being used as the key to SIMCInstr, which is a
collision in the AGPR case. We also need to go out of our way
to make sure we are using the gfx9+ variants of the pseudos
without the m0 use. The DS multiclasses could use a lot of
cleanup.

Fixes #155777
2025-09-04 09:13:36 +09:00

301 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s
define void @ds_atomic_cmpxchg_i32_ret_av_av__av(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_av__av:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v2
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=^VA"()
%data1 = call i32 asm "; def $0", "=^VA"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "^VA"(i32 %result)
ret void
}
define void @ds_atomic_cmpxchg_i32_ret_av_av__v(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_av__v:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v2
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=^VA"()
%data1 = call i32 asm "; def $0", "=^VA"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "v"(i32 %result)
ret void
}
define void @ds_atomic_cmpxchg_i32_ret_av_av__a(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_av__a:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v2
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use a0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=^VA"()
%data1 = call i32 asm "; def $0", "=^VA"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "a"(i32 %result)
ret void
}
define void @ds_atomic_cmpxchg_i32_ret_a_a__a(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_a_a__a:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
; CHECK-NEXT: v_accvgpr_read_b32 v2, a1
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use a0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=a"()
%data1 = call i32 asm "; def $0", "=a"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "a"(i32 %result)
ret void
}
define void @ds_atomic_cmpxchg_i32_ret_a_a__v(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_a_a__v:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
; CHECK-NEXT: v_accvgpr_read_b32 v2, a1
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=a"()
%data1 = call i32 asm "; def $0", "=a"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "v"(i32 %result)
ret void
}
define void @ds_atomic_cmpxchg_i32_ret_v_a__v(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_v_a__v:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=v"()
%data1 = call i32 asm "; def $0", "=a"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "v"(i32 %result)
ret void
}
define void @ds_atomic_cmpxchg_i32_ret_a_v__v(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_a_v__v:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v2, v1 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=a"()
%data1 = call i32 asm "; def $0", "=v"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "v"(i32 %result)
ret void
}
define void @ds_atomic_cmpxchg_i32_ret_v_v__a(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_v_v__a:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v2
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use a0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=v"()
%data1 = call i32 asm "; def $0", "=v"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "a"(i32 %result)
ret void
}
define void @ds_atomic_cmpxchg_i32_ret_av_v__av(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_v__av:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v2
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=^VA"()
%data1 = call i32 asm "; def $0", "=v"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "^VA"(i32 %result)
ret void
}
define void @ds_atomic_cmpxchg_i32_ret_v_av__av(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_v_av__av:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v2
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=v"()
%data1 = call i32 asm "; def $0", "=^VA"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "^VA"(i32 %result)
ret void
}
; FIXME: Broken
; define void @ds_atomic_cmpxchg_i32_ret_av_a__av(ptr addrspace(3) %ptr) #0 {
; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
; %data0 = call i32 asm "; def $0", "=^VA"()
; %data1 = call i32 asm "; def $0", "=a"()
; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
; %result = extractvalue { i32, i1 } %pair, 0
; call void asm "; use $0", "^VA"(i32 %result)
; ret void
; }
define void @ds_atomic_cmpxchg_i32_ret_a_av__av(ptr addrspace(3) %ptr) #0 {
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_a_av__av:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v1
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v2, v1 offset:40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
%data0 = call i32 asm "; def $0", "=a"()
%data1 = call i32 asm "; def $0", "=^VA"()
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
call void asm "; use $0", "^VA"(i32 %result)
ret void
}
attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" }