We currently have an issue where bf16 patters can be used to match fp16 types, as GISel does not know about the difference between the two. This patch explicitly disables them to make sure that they are never used. The opposite can also happen too, where fp16 patterns are used for operators that should be bf16. So this also changes any operations with bf16 types to now cause a fallback to SDAG. The pass setup for GISel has been slightly adjusted to make sure that a verify pass does not get added between AMD-SDAG and SIFixSGPRCopiesPass, which otherwise can cause verifier issues when falling back.
278 lines
11 KiB
LLVM
278 lines
11 KiB
LLVM
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN,GFX90A %s
|
|
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN,GFX90A %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
|
|
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
|
|
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
|
|
|
|
; DPP control value 337 is valid for 64-bit DPP on gfx942
|
|
|
|
; GCN-LABEL: update_dpp_i64:
|
|
;
|
|
; GFX90A-DAG: v_mov_b32_dpp v2, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v3, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GFX942: v_mov_b64_dpp v[2:3], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx2 v[4:5], v[2:3], off
|
|
|
|
define amdgpu_ps void @update_dpp_i64(i64 %in, i64 %old, ptr addrspace(1) %out) {
|
|
%tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %old, i64 %in, i32 337, i32 1, i32 1, i1 0)
|
|
store i64 %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_v2i32:
|
|
;
|
|
; GFX90A-DAG: v_mov_b32_dpp v2, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v3, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GFX942: v_mov_b64_dpp v[2:3], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx2 v[4:5], v[2:3], off
|
|
|
|
define amdgpu_ps void @update_dpp_v2i32(<2 x i32> %in, <2 x i32> %old, ptr addrspace(1) %out) {
|
|
%tmp0 = call <2 x i32> @llvm.amdgcn.update.dpp.v2i32(<2 x i32> %old, <2 x i32> %in, i32 337, i32 1, i32 1, i1 0)
|
|
store <2 x i32> %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_v3i32:
|
|
;
|
|
; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v2 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx3
|
|
|
|
define amdgpu_ps void @update_dpp_v3i32(<3 x i32> %in, <3 x i32> %old, ptr addrspace(1) %out) {
|
|
%tmp0 = call <3 x i32> @llvm.amdgcn.update.dpp.v3i32(<3 x i32> %old, <3 x i32> %in, i32 337, i32 1, i32 1, i1 0)
|
|
store <3 x i32> %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_v4i32:
|
|
;
|
|
; GFX90A-DAG: v_mov_b32_dpp v6, v2 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v7, v3 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v4, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v5, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GFX942-DAG: v_mov_b64_dpp v[6:7], v[2:3] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX942-DAG: v_mov_b64_dpp v[4:5], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx4 v[8:9], v[4:7], off
|
|
|
|
define amdgpu_ps void @update_dpp_v4i32(<4 x i32> %in, <4 x i32> %old, ptr addrspace(1) %out) {
|
|
%tmp0 = call <4 x i32> @llvm.amdgcn.update.dpp.v4i32(<4 x i32> %old, <4 x i32> %in, i32 337, i32 1, i32 1, i1 0)
|
|
store <4 x i32> %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_v2i32_poison:
|
|
;
|
|
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GFX942: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx2 v[2:3], v[0:1], off
|
|
|
|
define amdgpu_ps void @update_dpp_v2i32_poison(<2 x i32> %in, ptr addrspace(1) %out) {
|
|
%tmp0 = call <2 x i32> @llvm.amdgcn.update.dpp.v2i32(<2 x i32> poison, <2 x i32> %in, i32 337, i32 1, i32 1, i1 0)
|
|
store <2 x i32> %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_float:
|
|
;
|
|
; GCN: v_mov_b32_dpp v1, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dword v[2:3], v1, off
|
|
|
|
define amdgpu_ps void @update_dpp_float(float %in, float %old, ptr addrspace(1) %out) {
|
|
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %old, float %in, i32 337, i32 1, i32 1, i1 0)
|
|
store float %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_double:
|
|
;
|
|
; GFX90A-DAG: v_mov_b32_dpp v2, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v3, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GFX942: v_mov_b64_dpp v[2:3], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx2 v[4:5], v[2:3], off
|
|
|
|
define amdgpu_ps void @update_dpp_double(double %in, double %old, ptr addrspace(1) %out) {
|
|
%tmp0 = call double @llvm.amdgcn.update.dpp.f64(double %old, double %in, i32 337, i32 1, i32 1, i1 0)
|
|
store double %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_float_poison:
|
|
;
|
|
; GCN: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], v0, off
|
|
|
|
define amdgpu_ps void @update_dpp_float_poison(float %in, ptr addrspace(1) %out) {
|
|
%tmp0 = call float @llvm.amdgcn.update.dpp.f32(float poison, float %in, i32 337, i32 1, i32 1, i1 0)
|
|
store float %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_double_poison:
|
|
;
|
|
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GFX942: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx2 v[2:3], v[0:1], off
|
|
|
|
define amdgpu_ps void @update_dpp_double_poison(double %in, ptr addrspace(1) %out) {
|
|
%tmp0 = call double @llvm.amdgcn.update.dpp.f64(double poison, double %in, i32 337, i32 1, i32 1, i1 0)
|
|
store double %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_v2f32_poison:
|
|
;
|
|
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GFX942: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx2 v[2:3], v[0:1], off
|
|
|
|
define amdgpu_ps void @update_dpp_v2f32_poison(<2 x float> %in, ptr addrspace(1) %out) {
|
|
%tmp0 = call <2 x float> @llvm.amdgcn.update.dpp.v2f32(<2 x float> poison, <2 x float> %in, i32 337, i32 1, i32 1, i1 0)
|
|
store <2 x float> %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_v2f16_poison:
|
|
;
|
|
; GCN: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], v0, off
|
|
|
|
define amdgpu_ps void @update_dpp_v2f16_poison(<2 x half> %in, ptr addrspace(1) %out) {
|
|
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> poison, <2 x half> %in, i32 337, i32 1, i32 1, i1 0)
|
|
store <2 x half> %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_v8f16_poison:
|
|
;
|
|
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v2, v2 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v3, v3 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GFX942-DAG: v_mov_b64_dpp v[2:3], v[2:3] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX942-DAG: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx4 v[4:5], v[0:3], off
|
|
|
|
define amdgpu_ps void @update_dpp_v8f16_poison(<8 x half> %in, ptr addrspace(1) %out) {
|
|
%tmp0 = call <8 x half> @llvm.amdgcn.update.dpp.v8f16(<8 x half> poison, <8 x half> %in, i32 337, i32 1, i32 1, i1 0)
|
|
store <8 x half> %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_p3_poison:
|
|
;
|
|
; GCN: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], v0, off
|
|
|
|
define amdgpu_ps void @update_dpp_p3_poison(ptr addrspace(3) %in, ptr addrspace(1) %out) {
|
|
%tmp0 = call ptr addrspace(3) @llvm.amdgcn.update.dpp.p3(ptr addrspace(3) poison, ptr addrspace(3) %in, i32 337, i32 1, i32 1, i1 0)
|
|
store ptr addrspace(3) %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_p0_poison:
|
|
;
|
|
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GFX942-DAG: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx2 v[2:3], v[0:1], off
|
|
|
|
define amdgpu_ps void @update_dpp_p0_poison(ptr %in, ptr addrspace(1) %out) {
|
|
%tmp0 = call ptr @llvm.amdgcn.update.dpp.p0(ptr poison, ptr %in, i32 337, i32 1, i32 1, i1 0)
|
|
store ptr %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_i64_unsupported_dpp64_op:
|
|
;
|
|
; GCN-DAG: v_mov_b32_dpp v3, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
|
|
; GCN-DAG: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_dwordx2 v[4:5], v[2:3], off
|
|
|
|
define amdgpu_ps void @update_dpp_i64_unsupported_dpp64_op(i64 %in, i64 %old, ptr addrspace(1) %out) {
|
|
%tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %old, i64 %in, i32 1, i32 1, i32 1, i1 0)
|
|
store i64 %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_i16:
|
|
;
|
|
; GCN: v_mov_b32_dpp v1, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_short v[2:3], v1, off
|
|
|
|
define amdgpu_ps void @update_dpp_i16(i16 %in, i16 %old, ptr addrspace(1) %out) {
|
|
%tmp0 = call i16 @llvm.amdgcn.update.dpp.i16(i16 %old, i16 %in, i32 337, i32 1, i32 1, i1 0)
|
|
store i16 %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_half:
|
|
;
|
|
; GCN-DAG: v_mov_b32_dpp v1, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_short v[2:3], v1, off
|
|
|
|
define amdgpu_ps void @update_dpp_half(half %in, half %old, ptr addrspace(1) %out) {
|
|
%tmp0 = call half @llvm.amdgcn.update.dpp.f16(half %old, half %in, i32 337, i32 1, i32 1, i1 0)
|
|
store half %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_bfloat:
|
|
;
|
|
; GCN-DAG: v_mov_b32_dpp v1, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x1
|
|
;
|
|
; GCN: global_store_short v[2:3], v1, off
|
|
|
|
define amdgpu_ps void @update_dpp_bfloat(bfloat %in, bfloat %old, ptr addrspace(1) %out) {
|
|
%tmp0 = call bfloat @llvm.amdgcn.update.dpp.bf16(bfloat %old, bfloat %in, i32 337, i32 1, i32 1, i1 0)
|
|
store bfloat %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: update_dpp_v2p0_poison:
|
|
;
|
|
; GFX90A-DAG: v_mov_b32_dpp v0, v0 row_newbcast:1 row_mask:0x1 bank_mask:0x2
|
|
; GFX90A-DAG: v_mov_b32_dpp v1, v1 row_newbcast:1 row_mask:0x1 bank_mask:0x2
|
|
; GFX90A-DAG: v_mov_b32_dpp v2, v2 row_newbcast:1 row_mask:0x1 bank_mask:0x2
|
|
; GFX90A-DAG: v_mov_b32_dpp v3, v3 row_newbcast:1 row_mask:0x1 bank_mask:0x2
|
|
;
|
|
; GFX942-DAG: v_mov_b64_dpp v[2:3], v[2:3] row_newbcast:1 row_mask:0x1 bank_mask:0x2
|
|
; GFX942-DAG: v_mov_b64_dpp v[0:1], v[0:1] row_newbcast:1 row_mask:0x1 bank_mask:0x2
|
|
;
|
|
; GCN: global_store_dwordx4 v[4:5], v[0:3], off
|
|
|
|
define amdgpu_ps void @update_dpp_v2p0_poison(<2 x ptr> %in, ptr addrspace(1) %out) {
|
|
%tmp0 = call <2 x ptr> @llvm.amdgcn.update.dpp.v2p0(<2 x ptr> poison, <2 x ptr> %in, i32 337, i32 1, i32 2, i1 0)
|
|
store <2 x ptr> %tmp0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|