llvm-project/llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll
Daniil Fukalov b7f6abdd05
[AMDGPU] Try to reuse register with the constant from compare in v_cndmask (#148740)
For some targets, the optimization X == Const ? X : Y -> X == Const ?
Const : Y can cause extra register usage or redundant immediate encoding
for the constant in cndmask generated from the ternary operation.

This patch detects such cases and reuses the register from the compare
instruction that already holds the constant, instead of materializing it
again for cndmask.

The optimization avoids immediates that can be encoded into cndmask
instruction (including +-0.0), as well as !isNormal() constants.

The change is reworked on the base of #131146

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-07-16 23:18:44 +02:00

1430 lines
58 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX1010 %s
; Test the CMP+SELECT optimization that folds shared constants to reduce
; register pressure.
;------------------------------------------------------------------------------
; F32 Tests
;------------------------------------------------------------------------------
; Should be folded: fcmp oeq + select with constant in true value
define float @fcmp_select_fold_oeq_f32_imm(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_f32_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_f32_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0x40490FDB00000000
%sel = select i1 %cmp, float 0x40490FDB00000000, float %other
ret float %sel
}
; Should be folded: fcmp oeq + select with constant in true value (commutative)
define float @fcmp_select_fold_oeq_imm_f32(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_imm_f32:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f32:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float 0x40490FDB00000000, %arg
%sel = select i1 %cmp, float 0x40490FDB00000000, float %other
ret float %sel
}
; Should be folded: fcmp one + select with constant in false value
define float @fcmp_select_fold_one_f32_imm(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_fold_one_f32_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x402df850
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_f32_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float %arg, 0x4005BF0A00000000
%sel = select i1 %cmp, float %other, float 0x4005BF0A00000000
ret float %sel
}
; Should be folded: fcmp one + select with constant in false value (commutative)
define float @fcmp_select_fold_one_imm_f32(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_fold_one_imm_f32:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x402df850
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_imm_f32:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float 0x4005BF0A00000000, %arg
%sel = select i1 %cmp, float %other, float 0x4005BF0A00000000
ret float %sel
}
; Should NOT be folded: different constants
define float @fcmp_select_no_fold_f32_different_const(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_f32_different_const:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
; GFX900-NEXT: v_mov_b32_e32 v2, 0x46487ed8
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f32_different_const:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42487ed8, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x46487ed8, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0x40490FDB00000000
%sel = select i1 %cmp, float 0x40C90FDB00000000, float %other
ret float %sel
}
; Should NOT be folded: fcmp oeq with constant in other position
define float @fcmp_select_no_fold_f32_other_pos(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_f32_other_pos:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f32_other_pos:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0x40490FDB00000000
%sel = select i1 %cmp, float %other, float 0x40490FDB00000000
ret float %sel
}
; Should NOT be folded: unsupported comparison type
define float @fcmp_select_no_fold_f32_unsupported_cmp(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_f32_unsupported_cmp:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8
; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f32_unsupported_cmp:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42487ed8, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp olt float %arg, 0x40490FDB00000000
%sel = select i1 %cmp, float %other, float 0x40490FDB00000000
ret float %sel
}
; Should NOT be folded: imm can be encoded into cndmask
define float @fcmp_select_no_fold_f32_enc_imm(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 1.0, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 1.0, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 1.0
%sel = select i1 %cmp, float 1.0, float %other
ret float %sel
}
; Should NOT be folded: imm can be encoded into cndmask
define float @fcmp_select_no_fold_f32_enc_imm_2(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm_2:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, -4.0, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm_2:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, -4.0, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float -4.0, %arg
%sel = select i1 %cmp, float %other, float -4.0
ret float %sel
}
; Should NOT be folded: fcmp oeq with zero constant
define float @fcmp_select_no_fold_oeq_f32_zero(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_zero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_zero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0.0
%sel = select i1 %cmp, float 0.0, float %other
ret float %sel
}
; Should NOT be folded: fcmp one with negative zero constant
define float @fcmp_select_no_fold_one_f32_negzero(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_one_f32_negzero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_brev_b32 s4, 1
; GFX900-NEXT: v_bfrev_b32_e32 v2, 1
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_one_f32_negzero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x80000000, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float -0.0, %arg ; 0x8000000000000000
%sel = select i1 %cmp, float %other, float -0.0 ;0x8000000000000000
ret float %sel
}
; NaN values should bypass the optimization due to special IEEE 754 behavior
; fcmp oeq with NaN always returns false, so select always chooses %other
define float @fcmp_select_no_fold_oeq_f32_nan(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_nan:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_nan:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, v1
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0x7FF8000000000000
%sel = select i1 %cmp, float 0x7FF8000000000000, float %other
ret float %sel
}
; NaN values should bypass the optimization due to special IEEE 754 behavior
; fcmp one with NaN always returns false, so select always chooses the NaN constant
define float @fcmp_select_no_fold_one_f32_nan(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_one_f32_nan:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_one_f32_nan:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float 0x7FF8000000000000, %arg
%sel = select i1 %cmp, float %other, float 0x7FF8000000000000
ret float %sel
}
; Should NOT be folded: fcmp one with positive infinity
; Infinity values should bypass the optimization, generating unfolded code
define float @fcmp_select_no_fold_posinf_oeq_f32(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_posinf_oeq_f32:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x7f800000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_posinf_oeq_f32:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x7f800000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0x7FF0000000000000
%sel = select i1 %cmp, float 0x7FF0000000000000, float %other
ret float %sel
}
; Should NOT be folded: fcmp one with negative infinity
; Infinity values should bypass the optimization, generating unfolded code
define float @fcmp_select_no_fold_neginf_f32_one(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_neginf_f32_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0xff800000
; GFX900-NEXT: v_mov_b32_e32 v2, 0xff800000
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_neginf_f32_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xff800000, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float 0xFFF0000000000000, %arg
%sel = select i1 %cmp, float %other, float 0xFFF0000000000000
ret float %sel
}
;------------------------------------------------------------------------------
; F64 Tests
;------------------------------------------------------------------------------
; Should be folded: f64 fcmp oeq + select with constant in true value
define double @fcmp_select_fold_oeq_f64_imm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_f64_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_f64_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 3.141592653589793
%sel = select i1 %cmp, double 3.141592653589793, double %other
ret double %sel
}
; Should be folded: f64 fcmp oeq + select with constant in true value (commutative)
define double @fcmp_select_fold_oeq_imm_f64(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_imm_f64:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f64:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double 3.141592653589793, %arg
%sel = select i1 %cmp, double 3.141592653589793, double %other
ret double %sel
}
; Should be folded: f64 fcmp one + select with constant in false value
define double @fcmp_select_fold_one_f64_imm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_fold_one_f64_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x8b145769
; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a
; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_f64_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769
; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a
; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double %arg, 2.718281828459045
%sel = select i1 %cmp, double %other, double 2.718281828459045
ret double %sel
}
; Should be folded: f64 fcmp one + select with constant in false value (commutative)
define double @fcmp_select_fold_one_imm_f64(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_fold_one_imm_f64:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x8b145769
; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a
; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_imm_f64:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769
; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a
; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double 2.718281828459045, %arg
%sel = select i1 %cmp, double %other, double 2.718281828459045
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with constant in other position
define double @fcmp_select_no_fold_f64_other_pos(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_f64_other_pos:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18
; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb
; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f64_other_pos:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 3.141592653589793
%sel = select i1 %cmp, double %other, double 3.141592653589793
ret double %sel
}
; Should NOT be folded: f64 fcmp unsupported comparison type
define double @fcmp_select_no_fold_f64_unsupported_cmp(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_f64_unsupported_cmp:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
; GFX900-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18
; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb
; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f64_unsupported_cmp:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
; GFX1010-NEXT: v_cmp_gt_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp olt double %arg, 3.141592653589793
%sel = select i1 %cmp, double %other, double 3.141592653589793
ret double %sel
}
; Should NOT be folded: imm can be encoded into cndmask
define double @fcmp_select_no_fold_f64_enc_imm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 1.0, v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0x3ff00000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 1.0, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x3ff00000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 1.0
%sel = select i1 %cmp, double 1.0, double %other
ret double %sel
}
; Should NOT be folded: imm can be encoded into cndmask
define double @fcmp_select_no_fold_f64_enc_imm_2(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm_2:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, -4.0, v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0xc0100000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm_2:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, -4.0, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xc0100000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double -4.0, %arg
%sel = select i1 %cmp, double %other, double -4.0
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with zero constant
define double @fcmp_select_no_fold_oeq_f64_zero(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_oeq_f64_zero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 0, v[0:1]
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_oeq_f64_zero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 0.0
%sel = select i1 %cmp, double 0.0, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp one with negative zero constant
define double @fcmp_select_no_fold_one_f64_negzero(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_one_f64_negzero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0
; GFX900-NEXT: s_brev_b32 s5, 1
; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_bfrev_b32_e32 v1, 1
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_one_f64_negzero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, 0x80000000, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x80000000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double -0.0, %arg
%sel = select i1 %cmp, double %other, double -0.0
ret double %sel
}
; Should NOT be folded: f64 different constants
define double @fcmp_select_no_fold_f64_different_const(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_f64_different_const:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v4, 0x8b145769
; GFX900-NEXT: v_mov_b32_e32 v1, 0x4005bf0a
; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f64_different_const:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8b145769, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x4005bf0a, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 3.141592653589793
%sel = select i1 %cmp, double 2.718281828459045, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with NaN constant
; fcmp oeq with NaN always returns false, so select always chooses %other
define double @fcmp_select_no_fold_nan_f64(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f64:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f64:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v1, v3
; GFX1010-NEXT: v_mov_b32_e32 v0, v2
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 0x7FF8000000000000
%sel = select i1 %cmp, double 0x7FF8000000000000, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with NaN constant (commutative variant)
; fcmp oeq with NaN always returns false, so select always chooses %other
define double @fcmp_select_no_fold_nan_f64_comm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f64_comm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_comm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v1, v3
; GFX1010-NEXT: v_mov_b32_e32 v0, v2
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double 0x7FF8000000000000, %arg
%sel = select i1 %cmp, double 0x7FF8000000000000, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp one with NaN constant
; fcmp one with NaN always returns false, so select always chooses the NaN constant
define double @fcmp_select_no_fold_nan_f64_one(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, 0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double %arg, 0x7FF8000000000000
%sel = select i1 %cmp, double %other, double 0x7FF8000000000000
ret double %sel
}
; Should NOT be folded: f64 fcmp one with NaN constant (commutative variant)
; fcmp one with NaN always returns false, so select always chooses the NaN constant
define double @fcmp_select_no_fold_nan_f64_one_comm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one_comm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one_comm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, 0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double 0x7FF8000000000000, %arg
%sel = select i1 %cmp, double %other, double 0x7FF8000000000000
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with positive infinity
; Infinity values should bypass the optimization, generating unfolded code
define double @fcmp_select_no_fold_posinf_f64(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_posinf_f64:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0
; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 0x7FF0000000000000
%sel = select i1 %cmp, double 0x7FF0000000000000, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with negative infinity
; Infinity values should bypass the optimization, generating unfolded code
define double @fcmp_select_no_fold_neginf_f64(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_neginf_f64:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0
; GFX900-NEXT: s_mov_b32 s5, 0xfff00000
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 0xFFF0000000000000
%sel = select i1 %cmp, double 0xFFF0000000000000, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with positive infinity (commutative variant)
; Infinity values should bypass the optimization, generating unfolded code
define double @fcmp_select_no_fold_posinf_f64_comm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_posinf_f64_comm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0
; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64_comm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double 0x7FF0000000000000, %arg
%sel = select i1 %cmp, double 0x7FF0000000000000, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with negative infinity (commutative variant)
; Infinity values should bypass the optimization, generating unfolded code
define double @fcmp_select_no_fold_neginf_f64_comm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_neginf_f64_comm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0
; GFX900-NEXT: s_mov_b32 s5, 0xfff00000
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64_comm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double 0xFFF0000000000000, %arg
%sel = select i1 %cmp, double 0xFFF0000000000000, double %other
ret double %sel
}
;------------------------------------------------------------------------------
; F16 Tests
;------------------------------------------------------------------------------
; Should be folded: f16 fcmp oeq + select with constant in true value
define half @fcmp_select_fold_oeq_f16_imm(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_f16_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4248
; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_f16_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half %arg, 0xH4248
%sel = select i1 %cmp, half 0xH4248, half %other
ret half %sel
}
; Should be folded: f16 fcmp oeq + select with constant in true value (commutative)
define half @fcmp_select_fold_oeq_imm_f16(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_imm_f16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4248
; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half 0xH4248, %arg
%sel = select i1 %cmp, half 0xH4248, half %other
ret half %sel
}
; Should be folded: f16 fcmp one + select with constant in false value
define half @fcmp_select_fold_one_f16_imm(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_fold_one_f16_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4020
; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_f16_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half %arg, 0xH4020
%sel = select i1 %cmp, half %other, half 0xH4020
ret half %sel
}
; Should be folded: f16 fcmp one + select with constant in false value (commutative)
define half @fcmp_select_fold_one_imm_f16(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_fold_one_imm_f16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4020
; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_imm_f16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half 0xH4020, %arg
%sel = select i1 %cmp, half %other, half 0xH4020
ret half %sel
}
; Should NOT be folded: different constants
define half @fcmp_select_no_fold_f16_different_const(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_f16_different_const:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4248
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300
; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f16_different_const:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0x4248, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half %arg, 0xH4248
%sel = select i1 %cmp, half 0xH4300, half %other
ret half %sel
}
; Should NOT be folded: NaN values bypass optimization
define half @fcmp_select_no_fold_nan_f16(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, v1
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half %arg, 0xH7e00
%sel = select i1 %cmp, half 0xH7e00, half %other
ret half %sel
}
; Should NOT be folded: f16 fcmp one with NaN constant
define half @fcmp_select_no_fold_nan_f16_one(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, 0x7e00
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7e00
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half %arg, 0xH7e00
%sel = select i1 %cmp, half %other, half 0xH7e00
ret half %sel
}
; Should NOT be folded: f16 fcmp one with +Inf constant
define half @fcmp_select_no_fold_posinf_f16_one(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_posinf_f16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x7c00
; GFX900-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_posinf_f16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x7c00, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half %arg, 0xH7c00
%sel = select i1 %cmp, half %other, half 0xH7c00
ret half %sel
}
; Should NOT be folded: f16 fcmp one with -Inf constant
define half @fcmp_select_no_fold_neginf_f16_one(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_neginf_f16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0xfc00
; GFX900-NEXT: v_mov_b32_e32 v2, 0xfc00
; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_neginf_f16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0xfc00, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xfc00, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half %arg, 0xHfc00
%sel = select i1 %cmp, half %other, half 0xHfc00
ret half %sel
}
; Should NOT be folded: f16 fcmp oeq with zero constant
define half @fcmp_select_no_fold_oeq_f16_zero(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_oeq_f16_zero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, 0, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_oeq_f16_zero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half %arg, 0xH0000
%sel = select i1 %cmp, half 0xH0000, half %other
ret half %sel
}
; Should NOT be folded: f16 fcmp one with negative zero constant
define half @fcmp_select_no_fold_one_f16_negzero(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_one_f16_negzero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x8000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x8000
; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_one_f16_negzero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x8000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8000, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half 0xH8000, %arg
%sel = select i1 %cmp, half %other, half 0xH8000
ret half %sel
}
; Should NOT be folded: f16 fcmp oeq with constant in other position
define half @fcmp_select_no_fold_f16_other_pos(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_f16_other_pos:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4248
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f16_other_pos:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half %arg, 0xH4248
%sel = select i1 %cmp, half %other, half 0xH4248
ret half %sel
}
; Should NOT be folded: f16 unsupported comparison type
define half @fcmp_select_no_fold_f16_unsupported_cmp(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_f16_unsupported_cmp:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4248
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
; GFX900-NEXT: v_cmp_gt_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f16_unsupported_cmp:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0x4248, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp olt half %arg, 0xH4248
%sel = select i1 %cmp, half %other, half 0xH4248
ret half %sel
}
;------------------------------------------------------------------------------
; BF16 Tests
;------------------------------------------------------------------------------
; Should be folded: bfloat fcmp oeq + select with constant in true value
define bfloat @fcmp_select_fold_oeq_bf16_imm(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_bf16_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x42480000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_bf16_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat %arg, 0xR4248
%sel = select i1 %cmp, bfloat 0xR4248, bfloat %other
ret bfloat %sel
}
; Should be folded: bfloat fcmp oeq + select with constant in true value (commutative)
define bfloat @fcmp_select_fold_oeq_imm_bf16(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_imm_bf16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x42480000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_imm_bf16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat 0xR4248, %arg
%sel = select i1 %cmp, bfloat 0xR4248, bfloat %other
ret bfloat %sel
}
; Should be folded: bfloat fcmp one + select with constant in false value
define bfloat @fcmp_select_fold_one_bf16_imm(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_fold_one_bf16_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x40200000
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_bf16_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat %arg, 0xR4020
%sel = select i1 %cmp, bfloat %other, bfloat 0xR4020
ret bfloat %sel
}
; Should be folded: bfloat fcmp one + select with constant in false value (commutative)
define bfloat @fcmp_select_fold_one_imm_bf16(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_fold_one_imm_bf16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x40200000
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_imm_bf16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat 0xR4020, %arg
%sel = select i1 %cmp, bfloat %other, bfloat 0xR4020
ret bfloat %sel
}
; Should NOT be folded: different constants
define bfloat @fcmp_select_no_fold_bf16_different_const(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_bf16_different_const:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x42480000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_bf16_different_const:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42480000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat %arg, 0xR4248
%sel = select i1 %cmp, bfloat 0xR4300, bfloat %other
ret bfloat %sel
}
; Should NOT be folded: NaN values bypass optimization
define bfloat @fcmp_select_no_fold_nan_bf16(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_bf16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, v1
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat %arg, 0xR7FC0
%sel = select i1 %cmp, bfloat 0xR7FC0, bfloat %other
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp one with NaN constant
define bfloat @fcmp_select_no_fold_nan_bf16_one(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_bf16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc0
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc0
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat %arg, 0xR7FC0
%sel = select i1 %cmp, bfloat %other, bfloat 0xR7FC0
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp one with +Inf constant
define bfloat @fcmp_select_no_fold_posinf_bf16_one(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_posinf_bf16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x7f800000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f80
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_posinf_bf16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x7f800000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f80, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat %arg, 0xR7F80
%sel = select i1 %cmp, bfloat %other, bfloat 0xR7F80
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp one with -Inf constant
define bfloat @fcmp_select_no_fold_neginf_bf16_one(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_neginf_bf16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0xff800000
; GFX900-NEXT: v_mov_b32_e32 v2, 0xffffff80
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_neginf_bf16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffffff80, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat %arg, 0xRFF80
%sel = select i1 %cmp, bfloat %other, bfloat 0xRFF80
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp oeq with zero constant
define bfloat @fcmp_select_no_fold_oeq_bf16_zero(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_oeq_bf16_zero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_oeq_bf16_zero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat %arg, 0xR0000
%sel = select i1 %cmp, bfloat 0xR0000, bfloat %other
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp one with negative zero constant
define bfloat @fcmp_select_no_fold_one_bf16_negzero(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_one_bf16_negzero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_brev_b32 s4, 1
; GFX900-NEXT: v_mov_b32_e32 v2, 0xffff8000
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_one_bf16_negzero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffff8000, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat 0xR8000, %arg
%sel = select i1 %cmp, bfloat %other, bfloat 0xR8000
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp oeq with constant in other position
define bfloat @fcmp_select_no_fold_bf16_other_pos(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_bf16_other_pos:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x42480000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_bf16_other_pos:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat %arg, 0xR4248
%sel = select i1 %cmp, bfloat %other, bfloat 0xR4248
ret bfloat %sel
}
; Should NOT be folded: bfloat unsupported comparison type
define bfloat @fcmp_select_no_fold_bf16_unsupported_cmp(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x42480000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42480000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp olt bfloat %arg, 0xR4248
%sel = select i1 %cmp, bfloat %other, bfloat 0xR4248
ret bfloat %sel
}