From e825f424270adfb42767d8cb452da61b7ebd9e31 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 27 Mar 2026 22:25:32 +0100 Subject: [PATCH] AMDGPU: Improve fsqrt f64 expansion with ninf (#183695) --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 10 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 +- .../AMDGPU/GlobalISel/legalize-fsqrt.mir | 5 +- llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll | 144 +++++++----------- llvm/test/CodeGen/AMDGPU/rsq.f64.ll | 140 ++++++++--------- 5 files changed, 132 insertions(+), 183 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 8b1114f59cd6..e245fa16bdfd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -6007,11 +6007,15 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI, auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt); SqrtRet = B.buildFLdexp(F64, SqrtRet, ScaleDown, Flags); - // TODO: Switch to fcmp oeq 0 for finite only. Can't fully remove this check - // with finite only or nsz because rsq(+/-0) = +/-inf + Register IsZeroOrInf; + if (MI.getFlag(MachineInstr::FmNoInfs)) { + auto ZeroFP = B.buildFConstant(F64, 0.0); + IsZeroOrInf = B.buildFCmp(FCmpInst::FCMP_OEQ, S1, SqrtX, ZeroFP).getReg(0); + } else { + IsZeroOrInf = B.buildIsFPClass(S1, SqrtX, fcZero | fcPosInf).getReg(0); + } // TODO: Check for DAZ and expand to subnormals - auto IsZeroOrInf = B.buildIsFPClass(LLT::scalar(1), SqrtX, fcZero | fcPosInf); // If x is +INF, +0, or -0, use its original value B.buildSelect(Dst, IsZeroOrInf, SqrtX, SqrtRet, Flags); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 39d1e762ac08..81a12f0dcf93 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -13459,13 +13459,17 @@ SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleDownFactor, ZeroInt); SqrtRet = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, SqrtRet, ScaleDown, Flags); - // TODO: Switch to fcmp oeq 0 for finite only. Can't fully remove this check - // with finite only or nsz because rsq(+/-0) = +/-inf - // TODO: Check for DAZ and expand to subnormals - SDValue IsZeroOrInf = - DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, SqrtX, - DAG.getTargetConstant(fcZero | fcPosInf, DL, MVT::i32)); + + SDValue IsZeroOrInf; + if (Flags.hasNoInfs()) { + SDValue Zero = DAG.getConstantFP(0.0, DL, MVT::f64); + IsZeroOrInf = DAG.getSetCC(DL, MVT::i1, SqrtX, Zero, ISD::SETOEQ); + } else { + IsZeroOrInf = + DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, SqrtX, + DAG.getTargetConstant(fcZero | fcPosInf, DL, MVT::i32)); + } // If x is +INF, +0, or -0, use its original value return DAG.getNode(ISD::SELECT, DL, MVT::f64, IsZeroOrInf, SqrtX, SqrtRet, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir index 0ed4865e8d51..f0df58966046 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -120,8 +120,9 @@ body: | ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -128 ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C1]] ; GCN-NEXT: [[FLDEXP1:%[0-9]+]]:_(s64) = ninf G_FLDEXP [[FMA6]], [[SELECT1]](s32) - ; GCN-NEXT: [[IS_FPCLASS:%[0-9]+]]:_(s1) = G_IS_FPCLASS [[FLDEXP]](s64), 608 - ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = ninf G_SELECT [[IS_FPCLASS]](s1), [[FLDEXP]], [[FLDEXP1]] + ; GCN-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GCN-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[FLDEXP]](s64), [[C5]] + ; GCN-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = ninf G_SELECT [[FCMP1]](s1), [[FLDEXP]], [[FLDEXP1]] ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SELECT2]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = ninf G_FSQRT %0 diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll index 9b979816b95b..7cdf08800cb2 100644 --- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll @@ -494,9 +494,8 @@ define double @v_sqrt_f64_ninf(double %x) { ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -522,9 +521,8 @@ define double @v_sqrt_f64_ninf(double %x) { ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -550,9 +548,8 @@ define double @v_sqrt_f64_ninf(double %x) { ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -578,9 +575,8 @@ define double @v_sqrt_f64_ninf(double %x) { ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -610,9 +606,8 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true" ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -638,9 +633,8 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true" ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -666,9 +660,8 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true" ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -694,9 +687,8 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true" ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -960,6 +952,7 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) { ; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; GFX6-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0 ; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3] ; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5 ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5 @@ -969,8 +962,6 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) { ; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5] ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] -; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0x260 -; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4 ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc @@ -989,6 +980,7 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) { ; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; GFX8-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0 ; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3] ; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5 ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5 @@ -998,8 +990,6 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) { ; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5] ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] -; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0x260 -; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4 ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc @@ -1026,9 +1016,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) { ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1055,9 +1044,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) { ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1214,6 +1202,7 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) { ; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; GFX6-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0 ; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3] ; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5 ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5 @@ -1223,8 +1212,6 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) { ; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5] ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] -; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0x260 -; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4 ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc @@ -1243,6 +1230,7 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) { ; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; GFX8-SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0 ; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3] ; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5 ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5 @@ -1252,8 +1240,6 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) { ; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5] ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] -; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0x260 -; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4 ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc @@ -1280,9 +1266,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) { ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1309,9 +1294,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) { ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1466,9 +1450,8 @@ define double @v_sqrt_f64_nnan_ninf(double %x) { ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1494,9 +1477,8 @@ define double @v_sqrt_f64_nnan_ninf(double %x) { ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1522,9 +1504,8 @@ define double @v_sqrt_f64_nnan_ninf(double %x) { ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1550,9 +1531,8 @@ define double @v_sqrt_f64_nnan_ninf(double %x) { ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1582,9 +1562,8 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) { ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1610,9 +1589,8 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) { ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1638,9 +1616,8 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) { ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -1666,9 +1643,8 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) { ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2236,9 +2212,8 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) { ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2264,9 +2239,8 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) { ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2292,9 +2266,8 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) { ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2320,9 +2293,8 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) { ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2353,9 +2325,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) { ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2381,9 +2352,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) { ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2409,9 +2379,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) { ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2437,9 +2406,8 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) { ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2482,12 +2450,11 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) { ; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9] ; GFX6-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11] -; GFX6-SDAG-NEXT: v_mov_b32_e32 v9, 0x260 -; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] -; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 -; GFX6-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9 -; GFX6-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10 +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3] +; GFX6-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9 ; GFX6-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc @@ -2528,12 +2495,11 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) { ; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9] ; GFX8-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11] -; GFX8-SDAG-NEXT: v_mov_b32_e32 v9, 0x260 -; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc +; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] -; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 -; GFX8-SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9 -; GFX8-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10 +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3] +; GFX8-SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9 ; GFX8-SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc @@ -2575,12 +2541,11 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) { ; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5] ; GFX6-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7] -; GFX6-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 -; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] -; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 -; GFX6-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9 -; GFX6-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10 +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3] +; GFX6-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9 ; GFX6-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc @@ -2622,12 +2587,11 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) { ; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5] ; GFX8-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7] -; GFX8-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 -; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc +; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] -; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 -; GFX8-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9 -; GFX8-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10 +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3] +; GFX8-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9 ; GFX8-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc @@ -2659,9 +2623,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) { ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2687,9 +2650,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) { ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-SDAG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2715,9 +2677,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) { ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX6-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -2743,9 +2704,8 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) { ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; GFX8-GISEL-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll index 43bfe73515ad..750ab22aa9a5 100644 --- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll @@ -4437,7 +4437,6 @@ define double @v_rsq_f64__afn_ninf(double %x) { ; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] -; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260 ; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3] ; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5 ; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5 @@ -4449,7 +4448,7 @@ define double @v_rsq_f64__afn_ninf(double %x) { ; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] @@ -4471,7 +4470,6 @@ define double @v_rsq_f64__afn_ninf(double %x) { ; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260 ; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -4484,7 +4482,7 @@ define double @v_rsq_f64__afn_ninf(double %x) { ; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] @@ -4516,9 +4514,8 @@ define double @v_rsq_f64__afn_ninf(double %x) { ; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260 ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc @@ -4551,9 +4548,8 @@ define double @v_rsq_f64__afn_ninf(double %x) { ; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260 ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -4847,7 +4843,6 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) { ; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] -; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260 ; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3] ; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5 ; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5 @@ -4859,7 +4854,7 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) { ; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] @@ -4881,7 +4876,6 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) { ; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260 ; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -4894,7 +4888,7 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) { ; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] @@ -4926,9 +4920,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) { ; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260 ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc @@ -4961,9 +4954,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) { ; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260 ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -5044,7 +5036,6 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) { ; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] -; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260 ; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3] ; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5 ; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5 @@ -5056,7 +5047,7 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) { ; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] @@ -5079,7 +5070,6 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) { ; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260 ; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] @@ -5092,7 +5082,7 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) { ; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] @@ -5124,9 +5114,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) { ; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260 ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc @@ -5160,9 +5149,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) { ; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260 ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -5243,7 +5231,6 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] -; SI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260 ; SI-SDAG-CG-NEXT: s_mov_b32 s6, 0x3ff00000 ; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3] ; SI-SDAG-CG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5 @@ -5256,7 +5243,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; SI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-SDAG-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 @@ -5285,9 +5272,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; SI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260 -; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-CG-NEXT: v_mov_b32_e32 v10, 0x3ff00000 +; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-CG-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-CG-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] ; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 @@ -5299,7 +5285,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; SI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-GISEL-CG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 @@ -5338,9 +5324,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] ; VI-SDAG-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5] ; VI-SDAG-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; VI-SDAG-CG-NEXT: v_mov_b32_e32 v5, 0x260 ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; VI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc @@ -5377,9 +5362,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1] ; VI-GISEL-CG-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3] ; VI-GISEL-CG-NEXT: v_mov_b32_e32 v4, 0xffffff80 -; VI-GISEL-CG-NEXT: v_mov_b32_e32 v5, 0x260 ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5 +; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; VI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -5488,35 +5472,34 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) { ; SI-SDAG-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; SI-SDAG-CG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1] ; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] +; SI-SDAG-CG-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5] +; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v12 ; SI-SDAG-CG-NEXT: v_mov_b32_e32 v14, 0xffffff80 -; SI-SDAG-CG-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-SDAG-CG-NEXT: v_mul_f64 v[6:7], v[2:3], v[4:5] ; SI-SDAG-CG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5 +; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v15, 0, v14, vcc ; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5 +; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[2:3] ; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] ; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] -; SI-SDAG-CG-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5] ; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3] -; SI-SDAG-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v8 -; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7] ; SI-SDAG-CG-NEXT: v_rsq_f64_e32 v[8:9], v[0:1] +; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7] ; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3] -; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v12, 0, v14, vcc -; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7] -; SI-SDAG-CG-NEXT: v_mul_f64 v[6:7], v[0:1], v[8:9] +; SI-SDAG-CG-NEXT: v_mul_f64 v[12:13], v[0:1], v[8:9] ; SI-SDAG-CG-NEXT: v_mul_f64 v[8:9], v[8:9], 0.5 -; SI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v12 -; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5 -; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v15 -; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] -; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] -; SI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[6:7], v[6:7], v[0:1] +; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7] +; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], -v[8:9], v[12:13], 0.5 +; SI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v15 +; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], v[12:13], v[6:7], v[12:13] +; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[8:9], v[6:7], v[8:9] +; SI-SDAG-CG-NEXT: v_fma_f64 v[12:13], -v[10:11], v[10:11], v[0:1] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; SI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[12:13], v[8:9], v[6:7] +; SI-SDAG-CG-NEXT: v_fma_f64 v[8:9], v[12:13], v[6:7], v[10:11] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1] -; SI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v15 -; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7] +; SI-SDAG-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[0:1] +; SI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] +; SI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9] ; SI-SDAG-CG-NEXT: v_cndmask_b32_e64 v6, 0, v14, s[4:5] ; SI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6 ; SI-SDAG-CG-NEXT: v_rcp_f64_e32 v[6:7], v[2:3] @@ -5547,39 +5530,38 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) { ; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v6, 8, v6 ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6 -; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] ; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1] +; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 +; SI-GISEL-CG-NEXT: v_mov_b32_e32 v12, 0xffffff80 ; SI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5 ; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7] +; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc ; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5 +; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] ; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] ; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] ; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1] ; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7] -; SI-GISEL-CG-NEXT: v_lshlrev_b32_e32 v6, 8, v12 -; SI-GISEL-CG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6 -; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1] -; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[10:11], v[2:3] -; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5] -; SI-GISEL-CG-NEXT: v_mov_b32_e32 v12, 0xffffff80 -; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc -; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5 -; SI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11] +; SI-GISEL-CG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] +; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[4:5], v[0:1] +; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[4:5] +; SI-GISEL-CG-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5 +; SI-GISEL-CG-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7] ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13 -; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5 -; SI-GISEL-CG-NEXT: v_mov_b32_e32 v13, 0x260 -; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] -; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] -; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3] -; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13 -; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9] +; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5 ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3] +; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] +; SI-GISEL-CG-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] +; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3] ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9] +; SI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[10:11], v[8:9], v[6:7] +; SI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[2:3] +; SI-GISEL-CG-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3] +; SI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7] ; SI-GISEL-CG-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] ; SI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6 -; SI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13 ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; SI-GISEL-CG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; SI-GISEL-CG-NEXT: v_rcp_f64_e32 v[4:5], v[0:1] @@ -5631,12 +5613,11 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) { ; VI-SDAG-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9] ; VI-SDAG-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; VI-SDAG-CG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11] -; VI-SDAG-CG-NEXT: v_mov_b32_e32 v9, 0x260 -; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc +; VI-SDAG-CG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc ; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] -; VI-SDAG-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 -; VI-SDAG-CG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9 -; VI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10 +; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] +; VI-SDAG-CG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3] +; VI-SDAG-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9 ; VI-SDAG-CG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8 ; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] ; VI-SDAG-CG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] @@ -5692,12 +5673,11 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) { ; VI-GISEL-CG-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5] ; VI-GISEL-CG-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; VI-GISEL-CG-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7] -; VI-GISEL-CG-NEXT: v_mov_b32_e32 v9, 0x260 -; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc +; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc ; VI-GISEL-CG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] -; VI-GISEL-CG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 -; VI-GISEL-CG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9 -; VI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10 +; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] +; VI-GISEL-CG-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[2:3] +; VI-GISEL-CG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v9 ; VI-GISEL-CG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8 ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; VI-GISEL-CG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc