[DAG] isKnownNeverNaN - fallback to computeKnownFPClass check (#189476)
Remove ConstantFPSDNode handling from isKnownNeverNaN and fallback to using computeKnownFPClass if there are no opcode matches in isKnownNeverNaN The test check changes are due to isKnownNeverNaN not handling UNDEF/POISON but computeKnownFPClass does (POISON in particular now returns isKnownNeverNaN == true, preventing a ISD::FCANONICALIZE call in expandFMINNUM_FMAXNUM).
This commit is contained in:
parent
8b21fe60b4
commit
d74f098a30
@ -6149,12 +6149,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
|
||||
if (Depth >= MaxRecursionDepth)
|
||||
return false; // Limit search depth.
|
||||
|
||||
// If the value is a constant, we can obviously see if it is a NaN or not.
|
||||
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||
return !C->getValueAPF().isNaN() ||
|
||||
(SNaN && !C->getValueAPF().isSignaling());
|
||||
}
|
||||
|
||||
unsigned Opcode = Op.getOpcode();
|
||||
switch (Opcode) {
|
||||
case ISD::FADD:
|
||||
@ -6329,9 +6323,12 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
|
||||
return TLI->isKnownNeverNaNForTargetNode(Op, DemandedElts, *this, SNaN,
|
||||
Depth);
|
||||
}
|
||||
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
FPClassTest NanMask = SNaN ? fcSNan : fcNan;
|
||||
KnownFPClass Known = computeKnownFPClass(Op, DemandedElts, NanMask, Depth);
|
||||
return Known.isKnownNever(NanMask);
|
||||
}
|
||||
|
||||
bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
|
||||
|
||||
@ -3110,20 +3110,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX8-NEXT: flat_load_dword v3, v[0:1]
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
|
||||
; GFX8-NEXT: v_mov_b32_e32 v4, s0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
|
||||
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
|
||||
; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00, v3
|
||||
; GFX8-NEXT: v_max_f16_e32 v3, s0, v3
|
||||
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
|
||||
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
|
||||
@ -3939,9 +3939,9 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
|
||||
; GFX8-NEXT: v_max_f16_e32 v2, 0x7e00, v2
|
||||
; GFX8-NEXT: v_max_f16_e32 v2, s0, v2
|
||||
; GFX8-NEXT: v_max_f16_e32 v3, 0, v3
|
||||
; GFX8-NEXT: v_min_f16_e32 v3, 0x7e00, v3
|
||||
; GFX8-NEXT: v_min_f16_e32 v3, s0, v3
|
||||
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
|
||||
; GFX8-NEXT: flat_store_dword v[0:1], v2
|
||||
@ -4029,20 +4029,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX8-NEXT: flat_load_dword v3, v[0:1]
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
|
||||
; GFX8-NEXT: v_mov_b32_e32 v4, s0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
|
||||
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
|
||||
; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00, v3
|
||||
; GFX8-NEXT: v_max_f16_e32 v3, s0, v3
|
||||
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
|
||||
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
|
||||
|
||||
@ -1081,7 +1081,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
|
||||
; SDAG-GFX1100-TRUE16: ; %bb.0:
|
||||
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0
|
||||
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
|
||||
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
|
||||
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
@ -1092,14 +1091,13 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
|
||||
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
|
||||
; SDAG-GFX1100-FAKE16: ; %bb.0:
|
||||
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
|
||||
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
|
||||
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
|
||||
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
|
||||
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
|
||||
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
|
||||
; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
|
||||
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v6
|
||||
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
|
||||
@ -1107,7 +1105,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
|
||||
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
|
||||
; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
|
||||
; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
|
||||
; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
|
||||
; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
|
||||
@ -1118,7 +1115,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
|
||||
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
|
||||
; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
|
||||
; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
|
||||
; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
|
||||
; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user