This reverts commit 47f6a19181b426baa03182ab6a7a41e16b35301d. Breaks MIOpen, don't have propper fix yet.
1573 lines
55 KiB
LLVM
1573 lines
55 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "s_wait" --filter-out "s_nop" --filter-out "s_delay_alu" --filter-out "s_setpc_b64" --version 6
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck %s --check-prefixes=GCN,GFX906
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck %s --check-prefixes=GCN,GFX950
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck %s --check-prefixes=GCN,GFX10
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s --check-prefixes=GCN,GFX11PLUS,GFX11
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck %s --check-prefixes=GCN,GFX11PLUS,GFX1170
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=GCN,GFX11PLUS,GFX12
|
|
|
|
declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 %clamp)
|
|
|
|
define float @v_fdot2(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2
|
|
;
|
|
; GFX950-LABEL: v_fdot2:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX10-LABEL: v_fdot2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2c_f32_f16 v2, v0, v1
|
|
; GFX10: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX11-LABEL: v_fdot2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX1170-LABEL: v_fdot2:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
;
|
|
; GFX12-LABEL: v_fdot2:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_a(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GCN-LABEL: v_fdot2_neg_a:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
|
|
%neg.a = fneg <2 x half> %a
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %b, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_a_lo(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_neg_a_lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0]
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_a_lo:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_xor_b32_e32 v3, 0x8000, v0
|
|
; GFX950: s_mov_b32 s0, 0xffff
|
|
; GFX950: v_bfi_b32 v0, s0, v3, v0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_a_lo:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0]
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_a_lo:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_xor_b16 v0.l, 0x8000, v0.l
|
|
; GFX11: v_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_a_lo:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_xor_b16 v0.l, 0x8000, v0.l
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_a_lo:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_xor_b16 v0.l, 0x8000, v0.l
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
%a_lo = extractelement <2 x half> %a, i32 0
|
|
%neg.a_lo = fneg half %a_lo
|
|
%neg_lo.a = insertelement <2 x half> %a, half %neg.a_lo, i32 0
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %neg_lo.a, <2 x half> %b, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_a_hi(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_neg_a_hi:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[1,0,0]
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_a_hi:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x8000
|
|
; GFX950: v_xor_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
|
; GFX950: s_mov_b32 s0, 0x5040100
|
|
; GFX950: v_perm_b32 v0, v3, v0, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_a_hi:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[1,0,0]
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_a_hi:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_xor_b16 v0.h, 0x8000, v0.h
|
|
; GFX11: v_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_a_hi:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_xor_b16 v0.h, 0x8000, v0.h
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_a_hi:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_xor_b16 v0.h, 0x8000, v0.h
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
%a_hi = extractelement <2 x half> %a, i32 1
|
|
%neg.a_hi = fneg half %a_hi
|
|
%neg_hi.a = insertelement <2 x half> %a, half %neg.a_hi, i32 1
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %neg_hi.a, <2 x half> %b, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GCN-LABEL: v_fdot2_neg_b:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
|
|
%neg.b = fneg <2 x half> %b
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg.b, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_b_lo(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_neg_b_lo:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0]
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_b_lo:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_xor_b32_e32 v3, 0x8000, v1
|
|
; GFX950: s_mov_b32 s0, 0xffff
|
|
; GFX950: v_bfi_b32 v1, s0, v3, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_b_lo:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0]
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_b_lo:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_xor_b16 v1.l, 0x8000, v1.l
|
|
; GFX11: v_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_b_lo:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_xor_b16 v1.l, 0x8000, v1.l
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_b_lo:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_xor_b16 v1.l, 0x8000, v1.l
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
%b_lo = extractelement <2 x half> %b, i32 0
|
|
%neg.b_lo = fneg half %b_lo
|
|
%neg_lo.b = insertelement <2 x half> %b, half %neg.b_lo, i32 0
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg_lo.b, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_b_hi(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_neg_b_hi:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,1,0]
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_b_hi:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x8000
|
|
; GFX950: v_xor_b32_sdwa v3, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
|
; GFX950: s_mov_b32 s0, 0x5040100
|
|
; GFX950: v_perm_b32 v1, v3, v1, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_b_hi:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,1,0]
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_b_hi:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_xor_b16 v1.h, 0x8000, v1.h
|
|
; GFX11: v_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_b_hi:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_xor_b16 v1.h, 0x8000, v1.h
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_b_hi:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_xor_b16 v1.h, 0x8000, v1.h
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
%b_hi = extractelement <2 x half> %b, i32 1
|
|
%neg.b_hi = fneg half %b_hi
|
|
%neg_hi.b = insertelement <2 x half> %b, half %neg.b_hi, i32 1
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg_hi.b, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GCN-LABEL: v_fdot2_neg_c:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
|
|
%neg.c = fneg float %c
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_abs_c(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GCN-LABEL: v_fdot2_abs_c:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
|
|
%abs.c = call float @llvm.fabs.f32(float %c)
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %abs.c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_lo_a(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_opsel_lo_a:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[1,0,0]
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_lo_a:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x7060302
|
|
; GFX950: v_perm_b32 v0, v0, v0, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_lo_a:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[1,0,0]
|
|
;
|
|
; GFX11-LABEL: v_fdot2_opsel_lo_a:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11: v_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_opsel_lo_a:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
;
|
|
; GFX12-LABEL: v_fdot2_opsel_lo_a:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
%shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 1, i32 1>
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %shuf, <2 x half> %b, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_hi_a(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_opsel_hi_a:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[0,1,1]
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_hi_a:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x5040100
|
|
; GFX950: v_perm_b32 v0, v0, v0, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_hi_a:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[0,1,1]
|
|
;
|
|
; GFX11-LABEL: v_fdot2_opsel_hi_a:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_mov_b16_e32 v0.h, v0.l
|
|
; GFX11: v_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_opsel_hi_a:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_mov_b16_e32 v0.h, v0.l
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
;
|
|
; GFX12-LABEL: v_fdot2_opsel_hi_a:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_mov_b16_e32 v0.h, v0.l
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
%shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 0, i32 0>
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %shuf, <2 x half> %b, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_lo_b(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_opsel_lo_b:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[0,1,0]
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_lo_b:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x7060302
|
|
; GFX950: v_perm_b32 v1, v1, v1, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_lo_b:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[0,1,0]
|
|
;
|
|
; GFX11-LABEL: v_fdot2_opsel_lo_b:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_mov_b16_e32 v1.l, v1.h
|
|
; GFX11: v_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_opsel_lo_b:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_mov_b16_e32 v1.l, v1.h
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
;
|
|
; GFX12-LABEL: v_fdot2_opsel_lo_b:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_mov_b16_e32 v1.l, v1.h
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
%shuf = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> <i32 1, i32 1>
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %shuf, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_hi_b(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_opsel_hi_b:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[1,0,1]
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_hi_b:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x5040100
|
|
; GFX950: v_perm_b32 v1, v1, v1, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_hi_b:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[1,0,1]
|
|
;
|
|
; GFX11-LABEL: v_fdot2_opsel_hi_b:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_mov_b16_e32 v1.h, v1.l
|
|
; GFX11: v_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_opsel_hi_b:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_mov_b16_e32 v1.h, v1.l
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
;
|
|
; GFX12-LABEL: v_fdot2_opsel_hi_b:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_mov_b16_e32 v1.h, v1.l
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
%shuf = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> <i32 0, i32 0>
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %shuf, float %c, i1 false)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_a:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_a:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2c_f32_f16_e32 v1, 0x40004000, v0
|
|
; GFX950: v_mov_b32_e32 v0, v1
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_a:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2c_f32_f16 v1, 0x40004000, v0
|
|
; GFX10: v_mov_b32_e32 v0, v1
|
|
;
|
|
; GFX11-LABEL: v_fdot2_inline_literal_a:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dot2acc_f32_f16 v1, 0x40004000, v0
|
|
; GFX11: v_mov_b32_e32 v0, v1
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_inline_literal_a:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, 0x40004000, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_inline_literal_a:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, 0x40004000, v0, v1
|
|
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
|
|
ret float %ret
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_b:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_b:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2c_f32_f16_e32 v1, 0x40004000, v0
|
|
; GFX950: v_mov_b32_e32 v0, v1
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_b:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2c_f32_f16 v1, 0x40004000, v0
|
|
; GFX10: v_mov_b32_e32 v0, v1
|
|
;
|
|
; GFX11-LABEL: v_fdot2_inline_literal_b:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dot2acc_f32_f16 v1, 0x40004000, v0
|
|
; GFX11: v_mov_b32_e32 v0, v1
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_inline_literal_b:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, 0x40004000, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_inline_literal_b:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, 0x40004000, v1
|
|
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
|
|
ret float %ret
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_c:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, 2.0
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_c:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_mov_b32_e32 v2, 2.0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_c:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_mov_b32_e32 v2, 2.0
|
|
; GFX10: v_dot2c_f32_f16 v2, v0, v1
|
|
; GFX10: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX11-LABEL: v_fdot2_inline_literal_c:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_mov_b32_e32 v2, 2.0
|
|
; GFX11: v_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_mov_b32_e32 v0, v2
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_inline_literal_c:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, 2.0
|
|
;
|
|
; GFX12-LABEL: v_fdot2_inline_literal_c:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, 2.0
|
|
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 2.0, i1 false)
|
|
ret float %ret
|
|
}
|
|
|
|
define float @v_fdot2_clamp(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GCN-LABEL: v_fdot2_clamp:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, v2 clamp
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 true)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_a_clamp(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GCN-LABEL: v_fdot2_neg_a_clamp:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] clamp
|
|
%neg.a = fneg <2 x half> %a
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %b, float %c, i1 true)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_b_clamp(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GCN-LABEL: v_fdot2_neg_b_clamp:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0] clamp
|
|
%neg.b = fneg <2 x half> %b
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg.b, float %c, i1 true)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_c_clamp(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GCN-LABEL: v_fdot2_neg_c_clamp:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] clamp
|
|
%neg.c = fneg float %c
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 true)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_abs_c_clamp(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GCN-LABEL: v_fdot2_abs_c_clamp:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1] clamp
|
|
%abs.c = call float @llvm.fabs.f32(float %c)
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %abs.c, i1 true)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_lo_a_clamp(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_opsel_lo_a_clamp:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[1,0,0] clamp
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_lo_a_clamp:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x7060302
|
|
; GFX950: v_perm_b32 v0, v0, v0, s0
|
|
; GFX950: v_dot2_f32_f16 v0, v0, v1, v2 clamp
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_lo_a_clamp:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[1,0,0] clamp
|
|
;
|
|
; GFX11PLUS-LABEL: v_fdot2_opsel_lo_a_clamp:
|
|
; GFX11PLUS: ; %bb.0:
|
|
; GFX11PLUS: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11PLUS: v_dot2_f32_f16 v0, v0, v1, v2 clamp
|
|
%shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 1, i32 1>
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %shuf, <2 x half> %b, float %c, i1 true)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_hi_a_clamp(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_opsel_hi_a_clamp:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[0,1,1] clamp
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_hi_a_clamp:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x5040100
|
|
; GFX950: v_perm_b32 v0, v0, v0, s0
|
|
; GFX950: v_dot2_f32_f16 v0, v0, v1, v2 clamp
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_hi_a_clamp:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[0,1,1] clamp
|
|
;
|
|
; GFX11PLUS-LABEL: v_fdot2_opsel_hi_a_clamp:
|
|
; GFX11PLUS: ; %bb.0:
|
|
; GFX11PLUS: v_mov_b16_e32 v0.h, v0.l
|
|
; GFX11PLUS: v_dot2_f32_f16 v0, v0, v1, v2 clamp
|
|
%shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 0, i32 0>
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %shuf, <2 x half> %b, float %c, i1 true)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_lo_b_clamp(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_opsel_lo_b_clamp:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[0,1,0] clamp
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_lo_b_clamp:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x7060302
|
|
; GFX950: v_perm_b32 v1, v1, v1, s0
|
|
; GFX950: v_dot2_f32_f16 v0, v0, v1, v2 clamp
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_lo_b_clamp:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[0,1,0] clamp
|
|
;
|
|
; GFX11PLUS-LABEL: v_fdot2_opsel_lo_b_clamp:
|
|
; GFX11PLUS: ; %bb.0:
|
|
; GFX11PLUS: v_mov_b16_e32 v1.l, v1.h
|
|
; GFX11PLUS: v_dot2_f32_f16 v0, v0, v1, v2 clamp
|
|
%shuf = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> <i32 1, i32 1>
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %shuf, float %c, i1 true)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_hi_b_clamp(<2 x half> %a, <2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_opsel_hi_b_clamp:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[1,0,1] clamp
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_hi_b_clamp:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x5040100
|
|
; GFX950: v_perm_b32 v1, v1, v1, s0
|
|
; GFX950: v_dot2_f32_f16 v0, v0, v1, v2 clamp
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_hi_b_clamp:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[1,0,1] clamp
|
|
;
|
|
; GFX11PLUS-LABEL: v_fdot2_opsel_hi_b_clamp:
|
|
; GFX11PLUS: ; %bb.0:
|
|
; GFX11PLUS: v_mov_b16_e32 v1.h, v1.l
|
|
; GFX11PLUS: v_dot2_f32_f16 v0, v0, v1, v2 clamp
|
|
%shuf = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> <i32 0, i32 0>
|
|
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %shuf, float %c, i1 true)
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_a_clamp(<2 x half> %b, float %c) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_a_clamp:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1] clamp
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_a_clamp:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x40004000
|
|
; GFX950: v_dot2_f32_f16 v0, s0, v0, v1 clamp
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_a_clamp:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1] clamp
|
|
;
|
|
; GFX11PLUS-LABEL: v_fdot2_inline_literal_a_clamp:
|
|
; GFX11PLUS: ; %bb.0:
|
|
; GFX11PLUS: v_dot2_f32_f16 v0, 0x40004000, v0, v1 clamp
|
|
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 true)
|
|
ret float %ret
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_b_clamp(<2 x half> %a, float %c) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_b_clamp:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1] clamp
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_b_clamp:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x40004000
|
|
; GFX950: v_dot2_f32_f16 v0, v0, s0, v1 clamp
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_b_clamp:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1] clamp
|
|
;
|
|
; GFX11PLUS-LABEL: v_fdot2_inline_literal_b_clamp:
|
|
; GFX11PLUS: ; %bb.0:
|
|
; GFX11PLUS: v_dot2_f32_f16 v0, v0, 0x40004000, v1 clamp
|
|
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 true)
|
|
ret float %ret
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_c_clamp(<2 x half> %a, <2 x half> %b) {
|
|
; GCN-LABEL: v_fdot2_inline_literal_c_clamp:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, 2.0 clamp
|
|
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 2.0, i1 true)
|
|
ret float %ret
|
|
}
|
|
|
|
define float @v_fdot2_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2c_f32_f16 v2, v0, v1
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dual_dot2acc_f32_f16 v2, v0, v1 :: v_dual_dot2acc_f32_f16 v5, v3, v4
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_a_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_neg_a_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_a_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_a_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_a_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
|
|
; GFX11: v_dot2acc_f32_f16 v5, v3, v4
|
|
; GFX11: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_a_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_a_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%neg.a = fneg <2 x half> %a
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_a_lo_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_neg_a_lo_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_a_lo_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_xor_b32_e32 v6, 0x8000, v0
|
|
; GFX950: s_mov_b32 s0, 0xffff
|
|
; GFX950: v_bfi_b32 v0, s0, v6, v0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_a_lo_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_a_lo_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_xor_b16 v0.l, 0x8000, v0.l
|
|
; GFX11: v_dual_dot2acc_f32_f16 v5, v3, v4 :: v_dual_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_a_lo_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_xor_b16 v0.l, 0x8000, v0.l
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_a_lo_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_xor_b16 v0.l, 0x8000, v0.l
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%a_lo = extractelement <2 x half> %a, i32 0
|
|
%neg.a_lo = fneg half %a_lo
|
|
%neg_lo.a = insertelement <2 x half> %a, half %neg.a_lo, i32 0
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %neg_lo.a, <2 x half> %b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_a_hi_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_neg_a_hi_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[1,0,0]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_a_hi_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x8000
|
|
; GFX950: v_xor_b32_sdwa v6, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
|
; GFX950: s_mov_b32 s0, 0x5040100
|
|
; GFX950: v_perm_b32 v0, v6, v0, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_a_hi_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[1,0,0]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_a_hi_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_xor_b16 v0.h, 0x8000, v0.h
|
|
; GFX11: v_dual_dot2acc_f32_f16 v5, v3, v4 :: v_dual_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_a_hi_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_xor_b16 v0.h, 0x8000, v0.h
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_a_hi_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_xor_b16 v0.h, 0x8000, v0.h
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%a_hi = extractelement <2 x half> %a, i32 1
|
|
%neg.a_hi = fneg half %a_hi
|
|
%neg_hi.a = insertelement <2 x half> %a, half %neg.a_hi, i32 1
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %neg_hi.a, <2 x half> %b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_b_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_neg_b_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_b_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_b_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_b_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
|
|
; GFX11: v_dot2acc_f32_f16 v5, v3, v4
|
|
; GFX11: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_b_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_b_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%neg.b = fneg <2 x half> %b
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg.b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_b_lo_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_neg_b_lo_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_b_lo_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_xor_b32_e32 v6, 0x8000, v1
|
|
; GFX950: s_mov_b32 s0, 0xffff
|
|
; GFX950: v_bfi_b32 v1, s0, v6, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_b_lo_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_b_lo_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_xor_b16 v1.l, 0x8000, v1.l
|
|
; GFX11: v_dual_dot2acc_f32_f16 v5, v3, v4 :: v_dual_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_b_lo_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_xor_b16 v1.l, 0x8000, v1.l
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_b_lo_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_xor_b16 v1.l, 0x8000, v1.l
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%b_lo = extractelement <2 x half> %b, i32 0
|
|
%neg.b_lo = fneg half %b_lo
|
|
%neg_lo.b = insertelement <2 x half> %b, half %neg.b_lo, i32 0
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg_lo.b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_b_hi_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_neg_b_hi_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,1,0]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_b_hi_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x8000
|
|
; GFX950: v_xor_b32_sdwa v6, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
|
; GFX950: s_mov_b32 s0, 0x5040100
|
|
; GFX950: v_perm_b32 v1, v6, v1, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_b_hi_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,1,0]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_b_hi_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_xor_b16 v1.h, 0x8000, v1.h
|
|
; GFX11: v_dual_dot2acc_f32_f16 v5, v3, v4 :: v_dual_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_b_hi_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_xor_b16 v1.h, 0x8000, v1.h
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_b_hi_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_xor_b16 v1.h, 0x8000, v1.h
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%b_hi = extractelement <2 x half> %b, i32 1
|
|
%neg.b_hi = fneg half %b_hi
|
|
%neg_hi.b = insertelement <2 x half> %b, half %neg.b_hi, i32 1
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg_hi.b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_neg_c_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_neg_c_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_neg_c_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_neg_c_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_neg_c_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
|
|
; GFX11: v_dot2acc_f32_f16 v5, v3, v4
|
|
; GFX11: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_neg_c_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_neg_c_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1]
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%neg.c = fneg float %c
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_abs_c_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_abs_c_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_abs_c_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_abs_c_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_abs_c_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
|
|
; GFX11: v_dot2acc_f32_f16 v5, v3, v4
|
|
; GFX11: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_abs_c_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_abs_c_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2 neg_hi:[0,0,1]
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%abs.c = call float @llvm.fabs.f32(float %c)
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %abs.c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_lo_a_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_opsel_lo_a_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[1,0,0]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_lo_a_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x7060302
|
|
; GFX950: v_perm_b32 v0, v0, v0, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_lo_a_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[1,0,0]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_opsel_lo_a_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11: v_dual_dot2acc_f32_f16 v5, v3, v4 :: v_dual_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_opsel_lo_a_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_opsel_lo_a_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 1, i32 1>
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %shuf, <2 x half> %b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_hi_a_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_opsel_hi_a_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[0,1,1]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_hi_a_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x5040100
|
|
; GFX950: v_perm_b32 v0, v0, v0, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_hi_a_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[0,1,1]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_opsel_hi_a_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_mov_b16_e32 v0.h, v0.l
|
|
; GFX11: v_dual_dot2acc_f32_f16 v5, v3, v4 :: v_dual_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_opsel_hi_a_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_mov_b16_e32 v0.h, v0.l
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_opsel_hi_a_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_mov_b16_e32 v0.h, v0.l
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 0, i32 0>
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %shuf, <2 x half> %b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_lo_b_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_opsel_lo_b_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[0,1,0]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_lo_b_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x7060302
|
|
; GFX950: v_perm_b32 v1, v1, v1, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_lo_b_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel:[0,1,0]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_opsel_lo_b_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_mov_b16_e32 v1.l, v1.h
|
|
; GFX11: v_dual_dot2acc_f32_f16 v5, v3, v4 :: v_dual_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_opsel_lo_b_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_mov_b16_e32 v1.l, v1.h
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_opsel_lo_b_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_mov_b16_e32 v1.l, v1.h
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%shuf = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> <i32 1, i32 1>
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %shuf, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_opsel_hi_b_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_opsel_hi_b_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[1,0,1]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_opsel_hi_b_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: s_mov_b32 s0, 0x5040100
|
|
; GFX950: v_perm_b32 v1, v1, v1, s0
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_opsel_hi_b_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2_f32_f16 v0, v0, v1, v2 op_sel_hi:[1,0,1]
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v0, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_opsel_hi_b_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_mov_b16_e32 v1.h, v1.l
|
|
; GFX11: v_dual_dot2acc_f32_f16 v5, v3, v4 :: v_dual_dot2acc_f32_f16 v2, v0, v1
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_opsel_hi_b_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_mov_b16_e32 v1.h, v1.l
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_opsel_hi_b_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_mov_b16_e32 v1.h, v1.l
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%shuf = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> <i32 0, i32 0>
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %shuf, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
; literal tests for vopd, in some test functions some operands are conveniently
|
|
; swapped to avoid bank conflicts (r1-x_src0 uses %d %e, r1-x_src1 uses %e %d)
|
|
|
|
define float @v_fdot2_inline_literal_a_x(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_a_x:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, 2.0, v1, v2 op_sel_hi:[0,1,1]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_a_x:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, 0x40004000, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v3, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_a_x:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2c_f32_f16 v2, 0x40004000, v1
|
|
; GFX10: v_dot2c_f32_f16 v5, v3, v4
|
|
; GFX10: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_inline_literal_a_x:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dual_dot2acc_f32_f16 v2, 0x40004000, v1 :: v_dual_dot2acc_f32_f16 v5, v3, v4
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_inline_literal_a_x:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, 0x40004000, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_inline_literal_a_x:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, 0x40004000, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_a_y(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_a_y:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX906: v_dot2_f32_f16 v1, 2.0, v4, v5 op_sel_hi:[0,1,1]
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_a_y:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, 0x40004000, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_a_y:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2c_f32_f16 v2, v0, v1
|
|
; GFX10: v_dot2c_f32_f16 v5, 0x40004000, v4
|
|
; GFX10: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_inline_literal_a_y:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dual_dot2acc_f32_f16 v2, v0, v1 :: v_dual_dot2acc_f32_f16 v5, 0x40004000, v4
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_inline_literal_a_y:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, 0x40004000, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_inline_literal_a_y:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, 0x40004000, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_a_xy(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_a_xy:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, 2.0, v1, v2 op_sel_hi:[0,1,1]
|
|
; GFX906: v_dot2_f32_f16 v1, 2.0, v4, v5 op_sel_hi:[0,1,1]
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_a_xy:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, 0x40004000, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, 0x40004000, v4
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_a_xy:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2c_f32_f16 v2, 0x40004000, v1
|
|
; GFX10: v_dot2c_f32_f16 v5, 0x40004000, v4
|
|
; GFX10: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_inline_literal_a_xy:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dual_dot2acc_f32_f16 v2, 0x40004000, v1 :: v_dual_dot2acc_f32_f16 v5, 0x40004000, v4
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_inline_literal_a_xy:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, 0x40004000, v1, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, 0x40004000, v4, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_inline_literal_a_xy:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, 0x40004000, v1, v2
|
|
; GFX12: v_dot2_f32_f16 v1, 0x40004000, v4, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_b_x(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_b_x:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, 2.0, v2 op_sel_hi:[1,0,1]
|
|
; GFX906: v_dot2_f32_f16 v1, v4, v3, v5
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_b_x:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, 0x40004000, v0
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v4, v3
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_b_x:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2c_f32_f16 v2, 0x40004000, v0
|
|
; GFX10: v_dot2c_f32_f16 v5, v4, v3
|
|
; GFX10: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_inline_literal_b_x:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dual_dot2acc_f32_f16 v2, 0x40004000, v0 :: v_dual_dot2acc_f32_f16 v5, v4, v3
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_inline_literal_b_x:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, 0x40004000, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v4, v3, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_inline_literal_b_x:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, 0x40004000, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v4, v3, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %e, <2 x half> %d, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_b_y(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_b_y:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v1, v0, v2
|
|
; GFX906: v_dot2_f32_f16 v1, v3, 2.0, v5 op_sel_hi:[1,0,1]
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_b_y:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, v1, v0
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, 0x40004000, v3
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_b_y:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2c_f32_f16 v2, v1, v0
|
|
; GFX10: v_dot2c_f32_f16 v5, 0x40004000, v3
|
|
; GFX10: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_inline_literal_b_y:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dual_dot2acc_f32_f16 v2, v1, v0 :: v_dual_dot2acc_f32_f16 v5, 0x40004000, v3
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_inline_literal_b_y:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v1, v0, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, 0x40004000, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_inline_literal_b_y:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v1, v0, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, 0x40004000, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %b, <2 x half> %a, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> <half 2.0, half 2.0>, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_b_xy(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_b_xy:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, 2.0, v2 op_sel_hi:[1,0,1]
|
|
; GFX906: v_dot2_f32_f16 v1, v3, 2.0, v5 op_sel_hi:[1,0,1]
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_b_xy:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_dot2c_f32_f16_e32 v2, 0x40004000, v0
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, 0x40004000, v3
|
|
; GFX950: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_b_xy:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_dot2c_f32_f16 v2, 0x40004000, v0
|
|
; GFX10: v_dot2c_f32_f16 v5, 0x40004000, v3
|
|
; GFX10: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX11-LABEL: v_fdot2_inline_literal_b_xy:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dual_dot2acc_f32_f16 v2, 0x40004000, v0 :: v_dual_dot2acc_f32_f16 v5, 0x40004000, v3
|
|
; GFX11: v_add_f32_e32 v0, v2, v5
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_inline_literal_b_xy:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, 0x40004000, v2
|
|
; GFX1170: v_dot2_f32_f16 v1, v3, 0x40004000, v5
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_inline_literal_b_xy:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, 0x40004000, v2
|
|
; GFX12: v_dot2_f32_f16 v1, v3, 0x40004000, v5
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> <half 2.0, half 2.0>, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_inline_literal_c_dual(<2 x half> %a, <2 x half> %b, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GFX906-LABEL: v_fdot2_inline_literal_c_dual:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906: v_dot2_f32_f16 v0, v0, v1, 2.0
|
|
; GFX906: v_dot2_f32_f16 v1, v2, v3, v4
|
|
; GFX906: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX950-LABEL: v_fdot2_inline_literal_c_dual:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950: v_mov_b32_e32 v5, 2.0
|
|
; GFX950: v_dot2c_f32_f16_e32 v5, v0, v1
|
|
; GFX950: v_dot2c_f32_f16_e32 v4, v2, v3
|
|
; GFX950: v_add_f32_e32 v0, v5, v4
|
|
;
|
|
; GFX10-LABEL: v_fdot2_inline_literal_c_dual:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10: v_mov_b32_e32 v5, 2.0
|
|
; GFX10: v_dot2c_f32_f16 v4, v2, v3
|
|
; GFX10: v_dot2c_f32_f16 v5, v0, v1
|
|
; GFX10: v_add_f32_e32 v0, v5, v4
|
|
;
|
|
; GFX11-LABEL: v_fdot2_inline_literal_c_dual:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11: v_dual_mov_b32 v5, 2.0 :: v_dual_dot2acc_f32_f16 v4, v2, v3
|
|
; GFX11: v_dot2acc_f32_f16 v5, v0, v1
|
|
; GFX11: v_add_f32_e32 v0, v5, v4
|
|
;
|
|
; GFX1170-LABEL: v_fdot2_inline_literal_c_dual:
|
|
; GFX1170: ; %bb.0:
|
|
; GFX1170: v_dot2_f32_f16 v0, v0, v1, 2.0
|
|
; GFX1170: v_dot2_f32_f16 v1, v2, v3, v4
|
|
; GFX1170: v_add_f32_e32 v0, v0, v1
|
|
;
|
|
; GFX12-LABEL: v_fdot2_inline_literal_c_dual:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12: v_dot2_f32_f16 v0, v0, v1, 2.0
|
|
; GFX12: v_dot2_f32_f16 v1, v2, v3, v4
|
|
; GFX12: v_add_f32_e32 v0, v0, v1
|
|
%r0 = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 2.0, i1 false)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 false)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|
|
|
|
define float @v_fdot2_clamp_dual(<2 x half> %a, <2 x half> %b, float %c, <2 x half> %d, <2 x half> %e, float %f) {
|
|
; GCN-LABEL: v_fdot2_clamp_dual:
|
|
; GCN: ; %bb.0:
|
|
; GCN: v_dot2_f32_f16 v0, v0, v1, v2 clamp
|
|
; GCN: v_dot2_f32_f16 v1, v3, v4, v5 clamp
|
|
; GCN: v_add_f32_e32 v0, v0, v1
|
|
%r0 = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 true)
|
|
%r1 = call float @llvm.amdgcn.fdot2(<2 x half> %d, <2 x half> %e, float %f, i1 true)
|
|
%r = fadd float %r0, %r1
|
|
ret float %r
|
|
}
|