llvm-project/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll
David Green 5a81a559d6
[GISel] Explicitly disable BF16 tablegen patterns. (#124113)
We currently have an issue where bf16 patters can be used to match fp16
types, as GISel does not know about the difference between the two. This
patch explicitly disables them to make sure that they are never used.

The opposite can also happen too, where fp16 patterns are used for
operators that should be bf16. So this also changes any operations with
bf16 types to now cause a fallback to SDAG.

The pass setup for GISel has been slightly adjusted to make sure that a
verify pass does not get added between AMD-SDAG and SIFixSGPRCopiesPass,
which otherwise can cause verifier issues when falling back.
2025-01-27 22:21:12 +00:00

182 lines
9.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -global-isel=0 | FileCheck %s -check-prefix=GFX12-SDAG
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -global-isel=1 -global-isel-abort=2 | FileCheck %s -check-prefix=GFX12-GISEL
declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg)
declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32)
declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32)
define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret_offset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_noret_offset:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret_offset:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92
; GFX12-GISEL-NEXT: s_endpgm
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_noret:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen
; GFX12-GISEL-NEXT: s_endpgm
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret_offset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_ret_offset:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret_offset:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0)
ret <2 x half> %ret
}
define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_ret:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret <2 x half> %ret
}
define amdgpu_ps float @struct_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2f16_ret:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_ret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
%r = bitcast <2 x half> %orig to float
ret float %r
}
define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2f16_noret:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_noret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen
; GFX12-GISEL-NEXT: s_endpgm
%orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps float @struct_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_ret:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: flat_store_b32 v[1:2], v0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-SDAG-NEXT: s_wait_dscnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_ret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: flat_store_b32 v[1:2], v0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-GISEL-NEXT: s_wait_dscnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
store <2 x bfloat> %orig, ptr null
ret float 1.0
}
define amdgpu_ps void @struct_buffer_atomic_add_v2bf16_noret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_noret:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_noret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen
; GFX12-GISEL-NEXT: s_endpgm
%orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_atomic_add_v2bf16(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2bf16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2bf16:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
; GFX12-GISEL-NEXT: s_endpgm
%ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps float @raw_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2bf16_ret:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: flat_store_b32 v[1:2], v0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-SDAG-NEXT: s_wait_dscnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2bf16_ret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: flat_store_b32 v[1:2], v0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-GISEL-NEXT: s_wait_dscnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%orig = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
store <2 x bfloat> %orig, ptr null
ret float 1.0
}