
In some cases this will require an avoidable re-defining of a register, but it works out better most of the time. Also allow folding 64-bit immediates into subregister extracts, unless it would break an inline constant. We could be more aggressive here, but this set of conditions seems to do a reasonable job without introducing too many regressions.
4433 lines
214 KiB
LLVM
4433 lines
214 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
|
|
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
|
|
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
|
|
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
|
|
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
|
|
; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
|
|
|
|
define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f32_test1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f32_test1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f32_test1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f32_test1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, float 2.000000e+00, float 1.000000e+00
|
|
%ldexp = fmul float %x, %y
|
|
ret float %ldexp
|
|
}
|
|
|
|
define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f32_test2:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f32_test2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f32_test2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f32_test2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, float 5.000000e-01, float 1.000000e+00
|
|
%ldexp = fmul float %x, %y
|
|
ret float %ldexp
|
|
}
|
|
|
|
define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_v2f32_test3:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_v2f32_test3:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
|
|
; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_v2f32_test3:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
|
|
; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_v2f32_test3:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX9-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_v2f32_test3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
|
|
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_v2f32_test3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
|
|
%y = select <2 x i1> %bool, <2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
|
|
%ldexp = fmul <2 x float> %x, %y
|
|
ret <2 x float> %ldexp
|
|
}
|
|
|
|
define <2 x float> @fmul_select_v2f32_test4(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_v2f32_test4:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_v2f32_test4:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
|
|
; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_v2f32_test4:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
|
|
; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_v2f32_test4:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX9-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_v2f32_test4:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
|
|
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_v2f32_test4:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
|
|
%y = select <2 x i1> %bool, <2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
|
|
%ldexp = fmul <2 x float> %x, %y
|
|
ret <2 x float> %ldexp
|
|
}
|
|
|
|
define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f32_test5:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f32_test5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f32_test5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f32_test5:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, float -2.000000e+00, float -1.000000e+00
|
|
%ldexp = fmul float %x, %y
|
|
ret float %ldexp
|
|
}
|
|
|
|
define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f32_test6:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0400000
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f32_test6:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0400000
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x41000000
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
|
; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f32_test6:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0400000
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f32_test6:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0400000
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x41000000
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f32_test6:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0400000
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f32_test6:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x41000000
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc0400000, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: fmul_select_f32_test6:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0400000
|
|
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
|
|
; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: fmul_select_f32_test6:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x41000000
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc0400000, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, float -3.000000e+00, float 8.000000e+00
|
|
%ldexp = fmul float %x, %y
|
|
ret float %ldexp
|
|
}
|
|
|
|
define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v3, 0x5c
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 59, vcc
|
|
; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, 0x5c
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, 59, vcc
|
|
; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, float 0x43A0000000000000, float 0x45B0000000000000
|
|
%ldexp = fmul float %x, %y
|
|
ret float %ldexp
|
|
}
|
|
|
|
define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f32_test8:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0xc1000000
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0x41800000
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f32_test8:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x41800000
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0xc1000000
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
|
; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f32_test8:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc1000000
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x41800000
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f32_test8:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x41800000
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc1000000
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f32_test8:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x41800000
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f32_test8:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1000000
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x41800000, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: fmul_select_f32_test8:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x41800000
|
|
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo
|
|
; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: fmul_select_f32_test8:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1000000
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x41800000, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, float 1.600000e+01, float -8.000000e+00
|
|
%ldexp = fmul float %x, %y
|
|
ret float %ldexp
|
|
}
|
|
|
|
define float @fmul_select_f32_test9(float %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f32_test9:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f32_test9:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f32_test9:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f32_test9:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, float 0.000000e+00, float 2.000000e+00
|
|
%ldexp = fmul float %x, %y
|
|
ret float %ldexp
|
|
}
|
|
|
|
define float @fmul_select_f32_test10(float %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f32_test10:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_bfrev_b32_e32 v3, 1
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f32_test10:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f32_test10:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f32_test10:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, float -0.000000e+00, float 0.000000e+00
|
|
%ldexp = fmul float %x, %y
|
|
ret float %ldexp
|
|
}
|
|
|
|
define float @fmul_select_f32_test11_sel_log2val_pos78_pos56(float %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mov_b32_e32 v3, 0x4e
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, 56, v3, vcc
|
|
; GFX7-NEXT: v_ldexp_f32_e64 v0, -v0, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, 0x4e
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v1, 56, v3, vcc
|
|
; GFX9-NEXT: v_ldexp_f32 v0, -v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f32 v0, -v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ldexp_f32 v0, -v0, v1
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, float 0xC4D0000000000000, float 0xC370000000000000
|
|
%ldexp = fmul float %x, %y
|
|
ret float %ldexp
|
|
}
|
|
|
|
define float @fmul_select_f32_test12_sel_log2val_neg48_pos68(float %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x44
|
|
; GFX7-SDAG-NEXT: v_not_b32_e32 v4, 47
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_not_b32_e32 v3, 47
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x44
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x44
|
|
; GFX9-SDAG-NEXT: v_not_b32_e32 v4, 47
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_not_b32_e32 v3, 47
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x44
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
|
; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_not_b32_e32 v3, 47
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x44
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xffffffd0, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: v_not_b32_e32 v3, 47
|
|
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo
|
|
; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x44
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xffffffd0, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, float 0x3CF0000000000000, float 0x4430000000000000
|
|
%ldexp = fmul float %x, %y
|
|
ret float %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f64_test1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f64_test1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f64_test1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f64_test1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double 2.000000e+00, double 1.000000e+00
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f64_test2:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f64_test2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f64_test2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f64_test2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double 5.000000e-01, double 1.000000e+00
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
define <2 x double> @fmul_select_v2f64_test3(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_v2f64_test3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
|
; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX7-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_v2f64_test3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
|
; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_v2f64_test3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
|
|
; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_v2f64_test3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
|
|
; GFX11-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
|
|
%y = select <2 x i1> %bool, <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
|
|
%ldexp = fmul <2 x double> %x, %y
|
|
ret <2 x double> %ldexp
|
|
}
|
|
|
|
define <2 x double> @fmul_select_v2f64_test4(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_v2f64_test4:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
|
; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
|
|
; GFX7-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_v2f64_test4:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
|
; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
|
|
; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_v2f64_test4:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
|
|
; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_v2f64_test4:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo
|
|
; GFX11-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
|
|
%y = select <2 x i1> %bool, <2 x double> <double 5.000000e-01, double 5.000000e-01>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
|
|
%ldexp = fmul <2 x double> %x, %y
|
|
ret <2 x double> %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f64_test5:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GFX7-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f64_test5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f64_test5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f64_test5:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double -5.000000e-01, double -1.000000e+00
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f64_test6:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
; GFX7-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f64_test6:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f64_test6:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f64_test6:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double -2.000000e+00, double -1.000000e+00
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f64_test7:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f64_test7:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v5, 0xbff00000
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 2.0, vcc
|
|
; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f64_test7:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f64_test7:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xbff00000
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 2.0, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f64_test7:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo
|
|
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f64_test7:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double 2.000000e+00, double -1.000000e+00
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_f64_test8:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc
|
|
; GFX7-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f64_test8:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc
|
|
; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f64_test8:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f64_test8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double -4.000000e+00, double -3.200000e+01
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_v2f64_test9:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
|
; GFX7-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX7-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v4
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_v2f64_test9:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
|
; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX9-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_v2f64_test9:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
|
|
; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_v2f64_test9:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
|
|
; GFX11-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
|
|
%y = select <2 x i1> %bool, <2 x double> <double -2.000000e+00, double -2.000000e+00>, <2 x double> <double -1.000000e+00, double -1.000000e+00>
|
|
%ldexp = fmul <2 x double> %x, %y
|
|
ret <2 x double> %ldexp
|
|
}
|
|
|
|
define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_v2f64_test10:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v8, 0xbff00000
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v9, 0x3fe00000
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v8, 0
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX7-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_v2f64_test10:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v9, 0x3fe00000
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v8, 0
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX7-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_v2f64_test10:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v8, 0xbff00000
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v9, 0x3fe00000
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v8, 0
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_v2f64_test10:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v9, 0x3fe00000
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v8, 0
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX9-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_v2f64_test10:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v8, 0x3fe00000
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v9, 0xbff00000, v8, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
|
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v8, 0
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_v2f64_test10:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v9, 0xbff00000
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v8, 0
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v9, v9, 0x3fe00000, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
|
|
; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: fmul_select_v2f64_test10:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: v_mov_b32_e32 v8, 0x3fe00000
|
|
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
|
|
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v9, 0xbff00000, v8 :: v_dual_mov_b32 v8, 0
|
|
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
|
|
; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: fmul_select_v2f64_test10:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_dual_mov_b32 v9, 0xbff00000 :: v_dual_mov_b32 v8, 0
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
|
|
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v9, v9, 0x3fe00000, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
|
|
; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
|
|
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
|
|
%y = select <2 x i1> %bool, <2 x double> <double 5.000000e-01, double 2.000000e+00>, <2 x double> <double -1.000000e+00, double 1.000000e+00>
|
|
%ldexp = fmul <2 x double> %x, %y
|
|
ret <2 x double> %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f64_test11:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v4, 1
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f64_test11:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -2.0, vcc
|
|
; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f64_test11:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v4, 1
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f64_test11:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -2.0, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f64_test11:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo
|
|
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f64_test11:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double -2.000000e+00, double -0.000000e+00
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f64_test12:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v4, 1
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 0, vcc
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f64_test12:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
|
|
; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f64_test12:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v4, 1
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 0, vcc
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f64_test12:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f64_test12:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x80000000, 0, vcc_lo
|
|
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f64_test12:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x80000000, 0, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double 0.000000e+00, double -0.000000e+00
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f64_test13:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0x40300000
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 0, vcc
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f64_test13:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v5, 0x40300000
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
|
|
; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f64_test13:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x40300000
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 0, vcc
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f64_test13:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40300000
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f64_test13:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
|
|
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: fmul_select_f64_test13:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double 0.000000e+00, double 1.600000e+01
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_not_b32_e32 v4, 26
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v5, 0x5c
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
|
|
; GFX7-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x5c
|
|
; GFX7-GISEL-NEXT: v_not_b32_e32 v5, 26
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
|
|
; GFX7-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_not_b32_e32 v4, 26
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x5c
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
|
|
; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5c
|
|
; GFX9-GISEL-NEXT: v_not_b32_e32 v5, 26
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
|
|
; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x5c
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_not_b32_e32 v4, 26
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0x5c, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0x5c
|
|
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo
|
|
; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_not_b32_e32 v4, 26
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0x5c, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double 0x45B0000000000000, double 0x3E40000000000000
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_not_b32_e32 v4, 32
|
|
; GFX7-SDAG-NEXT: v_not_b32_e32 v5, 41
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
|
|
; GFX7-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_not_b32_e32 v4, 41
|
|
; GFX7-GISEL-NEXT: v_not_b32_e32 v5, 32
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
|
|
; GFX7-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_not_b32_e32 v4, 32
|
|
; GFX9-SDAG-NEXT: v_not_b32_e32 v5, 41
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
|
|
; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_not_b32_e32 v4, 41
|
|
; GFX9-GISEL-NEXT: v_not_b32_e32 v5, 32
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
|
|
; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_not_b32_e32 v4, 41
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_not_b32_e32 v4, 32
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0xffffffd6, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
|
|
; GFX11-SDAG: ; %bb.0:
|
|
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-NEXT: v_not_b32_e32 v4, 41
|
|
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo
|
|
; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
|
|
; GFX11-GISEL: ; %bb.0:
|
|
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-NEXT: v_not_b32_e32 v4, 32
|
|
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
|
|
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0xffffffd6, vcc_lo
|
|
; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
|
|
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, double 0x3D50000000000000, double 0x3DE0000000000000
|
|
%ldexp = fmul double %x, %y
|
|
ret double %ldexp
|
|
}
|
|
|
|
|
|
define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f16_test1:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f16_test1:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f16_test1:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f16_test1:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
|
|
; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
|
|
; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f16_test1:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f16_test1:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
|
|
; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test1:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test1:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_f16_test1:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_f16_test1:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, half 2.000000e+00, half 1.000000e+00
|
|
%ldexp = fmul half %x, %y
|
|
ret half %ldexp
|
|
}
|
|
|
|
define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f16_test2:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f16_test2:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f16_test2:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
|
|
; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2
|
|
; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f16_test2:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
|
|
; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
|
|
; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f16_test2:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x8000
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_med3_i32 v1, v1, s4, 0x7fff
|
|
; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f16_test2:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
|
|
; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test2:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test2:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_f16_test2:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_f16_test2:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, half 5.000000e-01, half 1.000000e+00
|
|
%ldexp = fmul half %x, %y
|
|
ret half %ldexp
|
|
}
|
|
|
|
define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_v2f16_test3:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_v2f16_test3:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_v2f16_test3:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3c00
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x4000
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
|
|
; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2
|
|
; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_v2f16_test3:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
|
|
; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4
|
|
; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v3, v4
|
|
; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1
|
|
; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_v2f16_test3:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0x4000
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2
|
|
; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_v2f16_test3:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1
|
|
; GFX10-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_v2f16_test3:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x3c00
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, v1, v3
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, 0x4000, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.h, v2.l, 0x4000, s0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v1.h, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v1
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_v2f16_test3:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v5, 0x4000
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_v2f16_test3:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v3, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v0.h, v2.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_v2f16_test3:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v3, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
|
|
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v4, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
|
|
%y = select <2 x i1> %bool, <2 x half> <half 2.000000e+00, half 2.000000e+00>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
|
|
%ldexp = fmul <2 x half> %x, %y
|
|
ret <2 x half> %ldexp
|
|
}
|
|
|
|
define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_v2f16_test4:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_v2f16_test4:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_v2f16_test4:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3c00
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x3800
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
|
|
; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2
|
|
; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_v2f16_test4:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
|
|
; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4
|
|
; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v3, v4
|
|
; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1
|
|
; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_v2f16_test4:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0x3800
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2
|
|
; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_v2f16_test4:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1
|
|
; GFX10-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
|
|
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
|
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_v2f16_test4:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x3c00
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, v1, v3
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, 0x3800, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.h, v2.l, 0x3800, s0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v1.h, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v1
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_v2f16_test4:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v5, 0x3800
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_v2f16_test4:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v3, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v0.h, v2.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_v2f16_test4:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v3, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
|
|
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v4, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
|
|
%y = select <2 x i1> %bool, <2 x half> <half 5.000000e-01, half 5.000000e-01>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
|
|
%ldexp = fmul <2 x half> %x, %y
|
|
ret <2 x half> %ldexp
|
|
}
|
|
|
|
define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f16_test5:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f16_test5:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f16_test5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc
|
|
; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f16_test5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
|
|
; GFX10-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test5:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test5:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_f16_test5:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_f16_test5:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, half 2.000000e+00, half 8.000000e+00
|
|
%ldexp = fmul half %x, %y
|
|
ret half %ldexp
|
|
}
|
|
|
|
define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f16_test6:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x40400000
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xc1000000
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f16_test6:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0xc800
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x4200
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f16_test6:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4200
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xc800
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f16_test6:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc800
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4200
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f16_test6:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xc800
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f16_test6:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4200
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc800, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test6:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x4200
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, 0xc800, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test6:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc800
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_f16_test6:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x4200
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, 0xc800, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_f16_test6:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4200
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, v3, 0xc800, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, half -8.000000e+00, half 3.000000e+00
|
|
%ldexp = fmul half %x, %y
|
|
ret half %ldexp
|
|
}
|
|
|
|
define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f16_test7:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f16_test7:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x4800
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0xc400
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f16_test7:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc400
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4800
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f16_test7:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4800
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc400
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
|
|
; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f16_test7:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4800
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f16_test7:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xc400
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x4800, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test7:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xc400
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, 0x4800, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test7:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4800
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_f16_test7:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0xc400
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, 0x4800, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_f16_test7:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc400
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, v3, 0x4800, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, half 8.000000e+00, half -4.000000e+00
|
|
%ldexp = fmul half %x, %y
|
|
ret half %ldexp
|
|
}
|
|
|
|
define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f16_test8:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v3, 1
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f16_test8:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x8000
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_f16_test8:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, 0x8000
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
|
|
; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_f16_test8:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
|
|
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test8:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, 0, 0x8000, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test8:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_f16_test8:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, 0, 0x8000, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_f16_test8:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, half -0.000000e+00, half 0.000000e+00
|
|
%ldexp = fmul half %x, %y
|
|
ret half %ldexp
|
|
}
|
|
|
|
define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f16_test9:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f16_test9:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
|
|
; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 5, v1
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f16_test9:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc
|
|
; GFX9-SDAG-NEXT: v_ldexp_f16_e64 v0, -v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f16_test9:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
|
|
; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 5, v1
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
|
|
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
|
|
; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
|
|
; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f16_test9:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_ldexp_f16_e64 v0, -v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f16_test9:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 5, v1
|
|
; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
|
|
; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test9:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e64 v0.l, -v0.l, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test9:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e64 v0, -v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_f16_test9:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u32_e32 v1, 5, v1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e64 v0.l, -v0.l, v1.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_f16_test9:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u32_e32 v1, 5, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e64 v0, -v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, half -1.600000e+01, half -3.200000e+01
|
|
%ldexp = fmul half %x, %y
|
|
ret half %ldexp
|
|
}
|
|
|
|
define half @fmul_select_f16_test10_sel_log2val_neg11_pos11(half %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc
|
|
; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2
|
|
; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc
|
|
; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x8000
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_med3_i32 v1, v1, s4, 0x7fff
|
|
; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, half 0xH1000, half 0xH6800
|
|
%ldexp = fmul half %x, %y
|
|
ret half %ldexp
|
|
}
|
|
|
|
define half @fmul_select_f16_test11_sel_log2val_pos7_neg14(half %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
|
|
; GFX7-SDAG: ; %bb.0:
|
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc
|
|
; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
|
|
; GFX7-GISEL: ; %bb.0:
|
|
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc
|
|
; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
|
; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc
|
|
; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
|
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff
|
|
; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2
|
|
; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc
|
|
; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
|
|
; GFX10-SDAG: ; %bb.0:
|
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x8000
|
|
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
|
|
; GFX10-SDAG-NEXT: v_med3_i32 v1, v1, s4, 0x7fff
|
|
; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
|
|
; GFX10-GISEL: ; %bb.0:
|
|
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
|
|
; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, half 0xH5800, half 0xH0400
|
|
%ldexp = fmul half %x, %y
|
|
ret half %ldexp
|
|
}
|
|
|
|
define bfloat @fmul_select_bf16_test1(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_bf16_test1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_bf16_test1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0x3f80
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_bf16_test1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, 0x4000
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, 0x3f80
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_bf16_test1:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3f80
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_bf16_test1:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_bf16_test1:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3f80
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_bf16_test1:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, bfloat 2.000000e+00, bfloat 1.000000e+00
|
|
%ldexp = fmul bfloat %x, %y
|
|
ret bfloat %ldexp
|
|
}
|
|
|
|
define bfloat @fmul_select_bf16_test2(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_bf16_test2:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_bf16_test2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0x3f80
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0x3f00
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_bf16_test2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, 0x3f00
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, 0x3f80
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_bf16_test2:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3f80
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x3f00, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_bf16_test2:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, 0x3f00 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_bf16_test2:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3f80
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x3f00, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_bf16_test2:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_dual_mov_b32 v3, 0x3f00 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, bfloat 5.000000e-01, bfloat 1.000000e+00
|
|
%ldexp = fmul bfloat %x, %y
|
|
ret bfloat %ldexp
|
|
}
|
|
|
|
define <2 x bfloat> @fmul_select_v2bf16_test3(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_v2bf16_test3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_v2bf16_test3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0x3f80
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v3, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: s_mov_b64 vcc, s[4:5]
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v1, v2, v1
|
|
; GFX9-NEXT: v_bfe_u32 v2, v1, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX9-NEXT: v_add3_u32 v2, v2, v1, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v3
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
|
|
; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1
|
|
; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
|
|
; GFX9-NEXT: s_mov_b32 s4, 0x7060302
|
|
; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_v2bf16_test3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, 0x4000
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, 0x3f80
|
|
; GFX10-NEXT: v_cmp_eq_u32_e64 s4, v1, v3
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v0
|
|
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v3, v4, v2, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: s_mov_b32 vcc_lo, s4
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v2, v4, v2, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v3
|
|
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v2
|
|
; GFX10-NEXT: v_bfe_u32 v3, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v5, 0x400000, v0
|
|
; GFX10-NEXT: v_bfe_u32 v2, v1, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v4, 0x400000, v1
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
|
|
; GFX10-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
|
|
; GFX10-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
|
|
; GFX10-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_v2bf16_test3:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x3f80
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, v1, v3
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v3.h, v2.l, 0x4000, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, 0x4000, s0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v3
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.h, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v3
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v0, v1
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_v2bf16_test3:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v5, 0x4000
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v1, v3, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v3, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v2, v1, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_v2bf16_test3:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x3f80
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, v1, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v3.h, v2.l, 0x4000, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, 0x4000, s0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.h, v1.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v0, v1
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_v2bf16_test3:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v5, 0x4000
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v1, v3, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v3, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v2, v1, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
|
|
%y = select <2 x i1> %bool, <2 x bfloat> <bfloat 2.000000e+00, bfloat 2.000000e+00>, <2 x bfloat> <bfloat 1.000000e+00, bfloat 1.000000e+00>
|
|
%ldexp = fmul <2 x bfloat> %x, %y
|
|
ret <2 x bfloat> %ldexp
|
|
}
|
|
|
|
define <2 x bfloat> @fmul_select_v2bf16_test4(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_v2bf16_test4:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_v2bf16_test4:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0x3f80
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0x3f00
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v3, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: s_mov_b64 vcc, s[4:5]
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v1, v2, v1
|
|
; GFX9-NEXT: v_bfe_u32 v2, v1, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX9-NEXT: v_add3_u32 v2, v2, v1, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v3
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
|
|
; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1
|
|
; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
|
|
; GFX9-NEXT: s_mov_b32 s4, 0x7060302
|
|
; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_v2bf16_test4:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, 0x3f00
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, 0x3f80
|
|
; GFX10-NEXT: v_cmp_eq_u32_e64 s4, v1, v3
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v0
|
|
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v3, v4, v2, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: s_mov_b32 vcc_lo, s4
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v2, v4, v2, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v3
|
|
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v2
|
|
; GFX10-NEXT: v_bfe_u32 v3, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v5, 0x400000, v0
|
|
; GFX10-NEXT: v_bfe_u32 v2, v1, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v4, 0x400000, v1
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
|
|
; GFX10-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
|
|
; GFX10-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
|
|
; GFX10-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_v2bf16_test4:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x3f80
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, v1, v3
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v3.h, v2.l, 0x3f00, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, 0x3f00, s0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v3
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.h, v1.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v3
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v0, v1
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_v2bf16_test4:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v5, 0x3f00
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v1, v3, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v3, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v2, v1, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_v2bf16_test4:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x3f80
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, v1, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v3.h, v2.l, 0x3f00, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, 0x3f00, s0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.h, v1.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v3
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v0, v1
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_v2bf16_test4:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v5, 0x3f00
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v1, v3, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v3, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v2, v1, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
|
|
%y = select <2 x i1> %bool, <2 x bfloat> <bfloat 5.000000e-01, bfloat 5.000000e-01>, <2 x bfloat> <bfloat 1.000000e+00, bfloat 1.000000e+00>
|
|
%ldexp = fmul <2 x bfloat> %x, %y
|
|
ret <2 x bfloat> %ldexp
|
|
}
|
|
|
|
define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_bf16_test5:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_bf16_test5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0x4100
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_bf16_test5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, 0x4000
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, 0x4100
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_bf16_test5:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x4100
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_bf16_test5:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_bf16_test5:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x4100
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_bf16_test5:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, bfloat 2.000000e+00, bfloat 8.000000e+00
|
|
%ldexp = fmul bfloat %x, %y
|
|
ret bfloat %ldexp
|
|
}
|
|
|
|
define bfloat @fmul_select_bf16_test6(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_bf16_test6:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000
|
|
; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_bf16_test6:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0x4040
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffc100
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_bf16_test6:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, 0xffffc100
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, 0x4040
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_bf16_test6:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x4040
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc100, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_bf16_test6:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, 0xffffc100 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_bf16_test6:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x4040
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc100, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_bf16_test6:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_dual_mov_b32 v3, 0xffffc100 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, bfloat -8.000000e+00, bfloat 3.000000e+00
|
|
%ldexp = fmul bfloat %x, %y
|
|
ret bfloat %ldexp
|
|
}
|
|
|
|
define bfloat @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_bf16_test7:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_bf16_test7:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffc080
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0x4100
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_bf16_test7:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, 0x4100
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffffc080
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_bf16_test7:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xc080
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4100, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_bf16_test7:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, 0x4100 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_bf16_test7:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xc080
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4100, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_bf16_test7:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_dual_mov_b32 v3, 0x4100 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, bfloat 8.000000e+00, bfloat -4.000000e+00
|
|
%ldexp = fmul bfloat %x, %y
|
|
ret bfloat %ldexp
|
|
}
|
|
|
|
define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_bf16_test8:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_bfrev_b32_e32 v3, 1
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_bf16_test8:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff8000
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_bf16_test8:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffff8000, vcc_lo
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_bf16_test8:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.h, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.h, 0, 0x8000, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v3, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_bf16_test8:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffff8000, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_bf16_test8:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.h, v0.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, 0, 0x8000, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v3, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_bf16_test8:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffff8000, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, bfloat -0.000000e+00, bfloat 0.000000e+00
|
|
%ldexp = fmul bfloat %x, %y
|
|
ret bfloat %ldexp
|
|
}
|
|
|
|
define bfloat @fmul_select_bf16_test9(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_bf16_test9:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_mov_b32_e32 v3, 0xc2000000
|
|
; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1800000
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_bf16_test9:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffc200
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffc180
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_bf16_test9:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, 0xffffc180
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffffc200
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_bf16_test9:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xc200
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc180, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_bf16_test9:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, 0xffffc180 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_bf16_test9:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xc200
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc180, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_bf16_test9:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_dual_mov_b32 v3, 0xffffc180 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, bfloat -1.600000e+01, bfloat -3.200000e+01
|
|
%ldexp = fmul bfloat %x, %y
|
|
ret bfloat %ldexp
|
|
}
|
|
|
|
define bfloat @fmul_select_bf16_test10_sel_log2val_pos65_pos56(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_mov_b32_e32 v3, 0xdb800000
|
|
; GFX7-NEXT: v_bfrev_b32_e32 v4, 7
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffdb80
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffe000
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, 0xffffe000
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffffdb80
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xdb80
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe000, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, 0xffffe000 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xdb80
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe000, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_dual_mov_b32 v3, 0xffffe000 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, bfloat 0xRE000, bfloat 0xRDB80
|
|
%ldexp = fmul bfloat %x, %y
|
|
ret bfloat %ldexp
|
|
}
|
|
|
|
define bfloat @fmul_select_bf16_test11_sel_log2val_neg22_pos25(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
|
|
; GFX7-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; GFX7-NEXT: v_bfrev_b32_e32 v3, 50
|
|
; GFX7-NEXT: v_mov_b32_e32 v4, 0x34800000
|
|
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0x4c00
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, 0x3480
|
|
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
|
|
; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
|
|
; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, 0x3480
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, 0x4c00
|
|
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc_lo dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX10-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX10-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-TRUE16-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
|
|
; GFX11-SDAG-TRUE16: ; %bb.0:
|
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x4c00
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x3480, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-SDAG-FAKE16-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
|
|
; GFX11-SDAG-FAKE16: ; %bb.0:
|
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, 0x3480 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-TRUE16-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
|
|
; GFX11-GISEL-TRUE16: ; %bb.0:
|
|
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x4c00
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x3480, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mul_f32_e32 v0, v4, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
|
|
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-GISEL-FAKE16-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
|
|
; GFX11-GISEL-FAKE16: ; %bb.0:
|
|
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_dual_mov_b32 v3, 0x3480 :: v_dual_lshlrev_b32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
|
|
; GFX11-GISEL-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
|
|
; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
|
|
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%bool = icmp eq i32 %bool.arg1, %bool.arg2
|
|
%y = select i1 %bool, bfloat 0xR3480, bfloat 0xR4C00
|
|
%ldexp = fmul bfloat %x, %y
|
|
ret bfloat %ldexp
|
|
}
|
|
|