llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.gfx90a.ll
David Green 5a81a559d6
[GISel] Explicitly disable BF16 tablegen patterns. (#124113)
We currently have an issue where bf16 patters can be used to match fp16
types, as GISel does not know about the difference between the two. This
patch explicitly disables them to make sure that they are never used.

The opposite can also happen too, where fp16 patterns are used for
operators that should be bf16. So this also changes any operations with
bf16 types to now cause a fallback to SDAG.

The pass setup for GISel has been slightly adjusted to make sure that a
verify pass does not get added between AMD-SDAG and SIFixSGPRCopiesPass,
which otherwise can cause verifier issues when falling back.
2025-01-27 22:21:12 +00:00

278 lines
11 KiB
LLVM

; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN,GFX90A %s
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN,GFX90A %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
; DPP control value 337 is valid for 64-bit DPP on gfx942
; GCN-LABEL: update_dpp_i64:
;
; GFX90A-DAG: v_mov_b32_dpp v2, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v3, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GFX942: v_mov_b64_dpp v[2:3], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx2 v[4:5], v[2:3], off
define amdgpu_ps void @update_dpp_i64(i64 %in, i64 %old, ptr addrspace(1) %out) {
%tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %old, i64 %in, i32 337, i32 1, i32 1, i1 0)
store i64 %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_v2i32:
;
; GFX90A-DAG: v_mov_b32_dpp v2, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v3, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GFX942: v_mov_b64_dpp v[2:3], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx2 v[4:5], v[2:3], off
define amdgpu_ps void @update_dpp_v2i32(<2 x i32> %in, <2 x i32> %old, ptr addrspace(1) %out) {
%tmp0 = call <2 x i32> @llvm.amdgcn.update.dpp.v2i32(<2 x i32> %old, <2 x i32> %in, i32 337, i32 1, i32 1, i1 0)
store <2 x i32> %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_v3i32:
;
; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v2 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx3
define amdgpu_ps void @update_dpp_v3i32(<3 x i32> %in, <3 x i32> %old, ptr addrspace(1) %out) {
%tmp0 = call <3 x i32> @llvm.amdgcn.update.dpp.v3i32(<3 x i32> %old, <3 x i32> %in, i32 337, i32 1, i32 1, i1 0)
store <3 x i32> %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_v4i32:
;
; GFX90A-DAG: v_mov_b32_dpp v6, v2 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v7, v3 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v4, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v5, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GFX942-DAG: v_mov_b64_dpp v[6:7], v[2:3] row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX942-DAG: v_mov_b64_dpp v[4:5], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx4 v[8:9], v[4:7], off
define amdgpu_ps void @update_dpp_v4i32(<4 x i32> %in, <4 x i32> %old, ptr addrspace(1) %out) {
%tmp0 = call <4 x i32> @llvm.amdgcn.update.dpp.v4i32(<4 x i32> %old, <4 x i32> %in, i32 337, i32 1, i32 1, i1 0)
store <4 x i32> %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_v2i32_poison:
;
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GFX942: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx2 v[2:3], v[0:1], off
define amdgpu_ps void @update_dpp_v2i32_poison(<2 x i32> %in, ptr addrspace(1) %out) {
%tmp0 = call <2 x i32> @llvm.amdgcn.update.dpp.v2i32(<2 x i32> poison, <2 x i32> %in, i32 337, i32 1, i32 1, i1 0)
store <2 x i32> %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_float:
;
; GCN: v_mov_b32_dpp v1, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dword v[2:3], v1, off
define amdgpu_ps void @update_dpp_float(float %in, float %old, ptr addrspace(1) %out) {
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %old, float %in, i32 337, i32 1, i32 1, i1 0)
store float %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_double:
;
; GFX90A-DAG: v_mov_b32_dpp v2, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v3, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GFX942: v_mov_b64_dpp v[2:3], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx2 v[4:5], v[2:3], off
define amdgpu_ps void @update_dpp_double(double %in, double %old, ptr addrspace(1) %out) {
%tmp0 = call double @llvm.amdgcn.update.dpp.f64(double %old, double %in, i32 337, i32 1, i32 1, i1 0)
store double %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_float_poison:
;
; GCN: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dword v[{{[0-9:]+}}], v0, off
define amdgpu_ps void @update_dpp_float_poison(float %in, ptr addrspace(1) %out) {
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float poison, float %in, i32 337, i32 1, i32 1, i1 0)
store float %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_double_poison:
;
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GFX942: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx2 v[2:3], v[0:1], off
define amdgpu_ps void @update_dpp_double_poison(double %in, ptr addrspace(1) %out) {
%tmp0 = call double @llvm.amdgcn.update.dpp.f64(double poison, double %in, i32 337, i32 1, i32 1, i1 0)
store double %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_v2f32_poison:
;
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GFX942: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx2 v[2:3], v[0:1], off
define amdgpu_ps void @update_dpp_v2f32_poison(<2 x float> %in, ptr addrspace(1) %out) {
%tmp0 = call <2 x float> @llvm.amdgcn.update.dpp.v2f32(<2 x float> poison, <2 x float> %in, i32 337, i32 1, i32 1, i1 0)
store <2 x float> %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_v2f16_poison:
;
; GCN: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dword v[{{[0-9:]+}}], v0, off
define amdgpu_ps void @update_dpp_v2f16_poison(<2 x half> %in, ptr addrspace(1) %out) {
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> poison, <2 x half> %in, i32 337, i32 1, i32 1, i1 0)
store <2 x half> %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_v8f16_poison:
;
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v2, v2 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v3, v3 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GFX942-DAG: v_mov_b64_dpp v[2:3], v[2:3] row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX942-DAG: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx4 v[4:5], v[0:3], off
define amdgpu_ps void @update_dpp_v8f16_poison(<8 x half> %in, ptr addrspace(1) %out) {
%tmp0 = call <8 x half> @llvm.amdgcn.update.dpp.v8f16(<8 x half> poison, <8 x half> %in, i32 337, i32 1, i32 1, i1 0)
store <8 x half> %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_p3_poison:
;
; GCN: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dword v[{{[0-9:]+}}], v0, off
define amdgpu_ps void @update_dpp_p3_poison(ptr addrspace(3) %in, ptr addrspace(1) %out) {
%tmp0 = call ptr addrspace(3) @llvm.amdgcn.update.dpp.p3(ptr addrspace(3) poison, ptr addrspace(3) %in, i32 337, i32 1, i32 1, i1 0)
store ptr addrspace(3) %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_p0_poison:
;
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GFX942-DAG: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx2 v[2:3], v[0:1], off
define amdgpu_ps void @update_dpp_p0_poison(ptr %in, ptr addrspace(1) %out) {
%tmp0 = call ptr @llvm.amdgcn.update.dpp.p0(ptr poison, ptr %in, i32 337, i32 1, i32 1, i1 0)
store ptr %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_i64_unsupported_dpp64_op:
;
; GCN-DAG: v_mov_b32_dpp v3, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GCN-DAG: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_dwordx2 v[4:5], v[2:3], off
define amdgpu_ps void @update_dpp_i64_unsupported_dpp64_op(i64 %in, i64 %old, ptr addrspace(1) %out) {
%tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %old, i64 %in, i32 1, i32 1, i32 1, i1 0)
store i64 %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_i16:
;
; GCN: v_mov_b32_dpp v1, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_short v[2:3], v1, off
define amdgpu_ps void @update_dpp_i16(i16 %in, i16 %old, ptr addrspace(1) %out) {
%tmp0 = call i16 @llvm.amdgcn.update.dpp.i16(i16 %old, i16 %in, i32 337, i32 1, i32 1, i1 0)
store i16 %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_half:
;
; GCN-DAG: v_mov_b32_dpp v1, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_short v[2:3], v1, off
define amdgpu_ps void @update_dpp_half(half %in, half %old, ptr addrspace(1) %out) {
%tmp0 = call half @llvm.amdgcn.update.dpp.f16(half %old, half %in, i32 337, i32 1, i32 1, i1 0)
store half %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_bfloat:
;
; GCN-DAG: v_mov_b32_dpp v1, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
;
; GCN: global_store_short v[2:3], v1, off
define amdgpu_ps void @update_dpp_bfloat(bfloat %in, bfloat %old, ptr addrspace(1) %out) {
%tmp0 = call bfloat @llvm.amdgcn.update.dpp.bf16(bfloat %old, bfloat %in, i32 337, i32 1, i32 1, i1 0)
store bfloat %tmp0, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: update_dpp_v2p0_poison:
;
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x2
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x2
; GFX90A-DAG: v_mov_b32_dpp v2, v2 row_newbcast:1 row_mask:0x1 bank_mask:0x2
; GFX90A-DAG: v_mov_b32_dpp v3, v3 row_newbcast:1 row_mask:0x1 bank_mask:0x2
;
; GFX942-DAG: v_mov_b64_dpp v[2:3], v[2:3] row_newbcast:1 row_mask:0x1 bank_mask:0x2
; GFX942-DAG: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x2
;
; GCN: global_store_dwordx4 v[4:5], v[0:3], off
define amdgpu_ps void @update_dpp_v2p0_poison(<2 x ptr> %in, ptr addrspace(1) %out) {
%tmp0 = call <2 x ptr> @llvm.amdgcn.update.dpp.v2p0(<2 x ptr> poison, <2 x ptr> %in, i32 337, i32 1, i32 2, i1 0)
store <2 x ptr> %tmp0, ptr addrspace(1) %out
ret void
}