llvm-project/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll
David Green 5a81a559d6
[GISel] Explicitly disable BF16 tablegen patterns. (#124113)
We currently have an issue where bf16 patters can be used to match fp16
types, as GISel does not know about the difference between the two. This
patch explicitly disables them to make sure that they are never used.

The opposite can also happen too, where fp16 patterns are used for
operators that should be bf16. So this also changes any operations with
bf16 types to now cause a fallback to SDAG.

The pass setup for GISel has been slightly adjusted to make sure that a
verify pass does not get added between AMD-SDAG and SIFixSGPRCopiesPass,
which otherwise can cause verifier issues when falling back.
2025-01-27 22:21:12 +00:00

93 lines
4.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 | FileCheck %s -check-prefix=GFX950-SDAG
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -global-isel-abort=2 | FileCheck %s -check-prefix=GFX950-GISEL
declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg)
declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32)
define amdgpu_ps float @struct_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_ret:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen sc0
; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_ret:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, v1
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen sc0
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: ; return to shader part epilog
%orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
store <2 x bfloat> %orig, ptr null
ret float 1.0
}
define amdgpu_ps void @struct_buffer_atomic_add_v2bf16_noret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_noret:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen
; GFX950-SDAG-NEXT: s_endpgm
;
; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_noret:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, v1
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen
; GFX950-GISEL-NEXT: s_endpgm
%orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_atomic_add_v2bf16(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
; GFX950-SDAG-NEXT: s_endpgm
;
; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
; GFX950-GISEL-NEXT: s_endpgm
%ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps float @raw_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16_ret:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0
; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16_ret:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: ; return to shader part epilog
%orig = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
store <2 x bfloat> %orig, ptr null
ret float 1.0
}