AMDGPU: Improve fsqrt f64 expansion with ninf (#183695)

This commit is contained in:
Matt Arsenault 2026-03-27 22:25:32 +01:00 committed by GitHub
parent 0f81923735
commit e825f42427
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 132 additions and 183 deletions

View File

@ -6007,11 +6007,15 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI,
auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt);
SqrtRet = B.buildFLdexp(F64, SqrtRet, ScaleDown, Flags);
// TODO: Switch to fcmp oeq 0 for finite only. Can't fully remove this check
// with finite only or nsz because rsq(+/-0) = +/-inf
Register IsZeroOrInf;
if (MI.getFlag(MachineInstr::FmNoInfs)) {
auto ZeroFP = B.buildFConstant(F64, 0.0);
IsZeroOrInf = B.buildFCmp(FCmpInst::FCMP_OEQ, S1, SqrtX, ZeroFP).getReg(0);
} else {
IsZeroOrInf = B.buildIsFPClass(S1, SqrtX, fcZero | fcPosInf).getReg(0);
}
// TODO: Check for DAZ and expand to subnormals
auto IsZeroOrInf = B.buildIsFPClass(LLT::scalar(1), SqrtX, fcZero | fcPosInf);
// If x is +INF, +0, or -0, use its original value
B.buildSelect(Dst, IsZeroOrInf, SqrtX, SqrtRet, Flags);

View File

@ -13459,13 +13459,17 @@ SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleDownFactor, ZeroInt);
SqrtRet = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, SqrtRet, ScaleDown, Flags);
// TODO: Switch to fcmp oeq 0 for finite only. Can't fully remove this check
// with finite only or nsz because rsq(+/-0) = +/-inf
// TODO: Check for DAZ and expand to subnormals
SDValue IsZeroOrInf =
DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, SqrtX,
DAG.getTargetConstant(fcZero | fcPosInf, DL, MVT::i32));
SDValue IsZeroOrInf;
if (Flags.hasNoInfs()) {
SDValue Zero = DAG.getConstantFP(0.0, DL, MVT::f64);
IsZeroOrInf = DAG.getSetCC(DL, MVT::i1, SqrtX, Zero, ISD::SETOEQ);
} else {
IsZeroOrInf =
DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, SqrtX,
DAG.getTargetConstant(fcZero | fcPosInf, DL, MVT::i32));
}
// If x is +INF, +0, or -0, use its original value
return DAG.getNode(ISD::SELECT, DL, MVT::f64, IsZeroOrInf, SqrtX, SqrtRet,

View File

@ -120,8 +120,9 @@ body: |
; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -128
; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C1]]
; GCN-NEXT: [[FLDEXP1:%[0-9]+]]:_(s64) = ninf G_FLDEXP [[FMA6]], [[SELECT1]](s32)
; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[FLDEXP]](s64), 608
; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = ninf G_SELECT [[IS_FPCLASS]](s1), [[FLDEXP]], [[FLDEXP1]]
; GCN-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
; GCN-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[FLDEXP]](s64), [[C5]]
; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = ninf G_SELECT [[FCMP1]](s1), [[FLDEXP]], [[FLDEXP1]]
; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SELECT2]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = ninf G_FSQRT %0

View File

@ -494,9 +494,8 @@ define double @v_sqrt_f64_ninf(double %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -522,9 +521,8 @@ define double @v_sqrt_f64_ninf(double %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -550,9 +548,8 @@ define double @v_sqrt_f64_ninf(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -578,9 +575,8 @@ define double @v_sqrt_f64_ninf(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -610,9 +606,8 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true"
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -638,9 +633,8 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true"
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -666,9 +660,8 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true"
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -694,9 +687,8 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true"
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -960,6 +952,7 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GFX6-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@ -969,8 +962,6 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@ -989,6 +980,7 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GFX8-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@ -998,8 +990,6 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@ -1026,9 +1016,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1055,9 +1044,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1214,6 +1202,7 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GFX6-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@ -1223,8 +1212,6 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@ -1243,6 +1230,7 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GFX8-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@ -1252,8 +1240,6 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0x260
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@ -1280,9 +1266,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1309,9 +1294,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1466,9 +1450,8 @@ define double @v_sqrt_f64_nnan_ninf(double %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1494,9 +1477,8 @@ define double @v_sqrt_f64_nnan_ninf(double %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1522,9 +1504,8 @@ define double @v_sqrt_f64_nnan_ninf(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1550,9 +1531,8 @@ define double @v_sqrt_f64_nnan_ninf(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1582,9 +1562,8 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1610,9 +1589,8 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1638,9 +1616,8 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -1666,9 +1643,8 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2236,9 +2212,8 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2264,9 +2239,8 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2292,9 +2266,8 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2320,9 +2293,8 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2353,9 +2325,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2381,9 +2352,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2409,9 +2379,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2437,9 +2406,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2482,12 +2450,11 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; GFX6-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; GFX6-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; GFX6-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
; GFX6-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
@ -2528,12 +2495,11 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v9, 0x260
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; GFX8-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; GFX8-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; GFX8-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
; GFX8-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
@ -2575,12 +2541,11 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; GFX6-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; GFX6-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; GFX6-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
; GFX6-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
@ -2622,12 +2587,11 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; GFX8-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; GFX8-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; GFX8-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
; GFX8-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
@ -2659,9 +2623,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2687,9 +2650,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2715,9 +2677,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -2743,9 +2704,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc

View File

@ -4437,7 +4437,6 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@ -4449,7 +4448,7 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@ -4471,7 +4470,6 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@ -4484,7 +4482,7 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@ -4516,9 +4514,8 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@ -4551,9 +4548,8 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -4847,7 +4843,6 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@ -4859,7 +4854,7 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@ -4881,7 +4876,6 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@ -4894,7 +4888,7 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@ -4926,9 +4920,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@ -4961,9 +4954,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -5044,7 +5036,6 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@ -5056,7 +5047,7 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@ -5079,7 +5070,6 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
@ -5092,7 +5082,7 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
@ -5124,9 +5114,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@ -5160,9 +5149,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -5243,7 +5231,6 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-SDAG-CG-NEXT: s_mov_b32 s6, 0x3ff00000
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
@ -5256,7 +5243,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@ -5285,9 +5272,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0x3ff00000
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@ -5299,7 +5285,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
@ -5338,9 +5324,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@ -5377,9 +5362,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@ -5488,35 +5472,34 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v12
; SI-SDAG-CG-NEXT: v_mov_b32_e32 v14, 0xffffff80
; SI-SDAG-CG-NEXT: v_mov_b32_e32 v15, 0x260
; SI-SDAG-CG-NEXT: v_mul_f64 v[6:7], v[2:3], v[4:5]
; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v15, 0, v14, vcc
; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[2:3]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5]
; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v8
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[8:9], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v12, 0, v14, vcc
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7]
; SI-SDAG-CG-NEXT: v_mul_f64 v[6:7], v[0:1], v[8:9]
; SI-SDAG-CG-NEXT: v_mul_f64 v[12:13], v[0:1], v[8:9]
; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[8:9], 0.5
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v12
; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
; SI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[6:7], v[6:7], v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[8:9], v[12:13], 0.5
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v15
; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], v[12:13], v[6:7], v[12:13]
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[8:9], v[6:7], v[8:9]
; SI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[12:13], v[8:9], v[6:7]
; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], v[12:13], v[6:7], v[10:11]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v15
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[0:1]
; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
; SI-SDAG-CG-NEXT: v_cndmask_b32_e64 v6, 0, v14, s[4:5]
; SI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
@ -5547,39 +5530,38 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SI-GISEL-CG-NEXT: v_mov_b32_e32 v12, 0xffffff80
; SI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v6, 8, v12
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
; SI-GISEL-CG-NEXT: v_mov_b32_e32 v12, 0xffffff80
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11]
; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[4:5], v[0:1]
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[4:5]
; SI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
; SI-GISEL-CG-NEXT: v_mov_b32_e32 v13, 0x260
; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[8:9], v[6:7]
; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5]
; SI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
@ -5631,12 +5613,11 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
; VI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
; VI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; VI-SDAG-CG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; VI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; VI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
; VI-SDAG-CG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
@ -5692,12 +5673,11 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; VI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; VI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; VI-GISEL-CG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; VI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3]
; VI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9
; VI-GISEL-CG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc