diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index dc860700db91..7911ef8c9aa0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -8673,6 +8673,36 @@ static Instruction *foldFCmpFSubIntoFCmp(FCmpInst &I, Instruction *LHSI, I.setHasNoNaNs(true); return &I; } + // fcmp `pred (C - Y), C` -> `fcmp swap(pred), Y, 0` + // where C and Y can't be arbitrary floating-point values. + // For example, with `C = 1.0f` and `Y = 0x1p-149`, `1.0f - Y` rounds back + // to `1.0f`, so the source compare is false while the rewritten compare is + // true. + // We need to make sure (C - Y) never rounds back to C + const APFloat *C; + Value *IntSrc; + if (match(RHSC, m_APFloat(C)) && + match(LHSI, m_FSub(m_Specific(RHSC), m_IToFP(m_Value(IntSrc)))) && + C->isNormal()) { + // Requirements on C and Y: + // 1. C is finite, nonzero, normal. + // 2. C shouldn't be too large, that is, ULP(C) <= 1. + // 3. Y must be the form of `[su]itofp`, so the finite nonzero result of Y + // must be integer-valued with an absolute value of at least 1; + // as long as the step size near C does not exceed 1, + // C - Y cannot be rounded back to C when Y != 0. + // 4. If Y = 0, `fcmp pred (C - 0), C` are equivalent to `fcmp swap(pred) + // 0, 0` for ordered and unordered predicates as long as C is finite and + // nonzero. + int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); + if (MantissaWidth != -1 && ilogb(*C) < MantissaWidth) { + Constant *ZeroC = ConstantFP::getZero(LHSI->getType()); + I.setPredicate(I.getSwappedPredicate()); + CI.replaceOperand(I, 0, Y); + CI.replaceOperand(I, 1, ZeroC); + return &I; + } + } break; } diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll index f3e242fb1b1e..e3c43812cedd 100644 --- a/llvm/test/Transforms/InstCombine/fcmp.ll +++ b/llvm/test/Transforms/InstCombine/fcmp.ll @@ -1812,6 +1812,145 @@ define i1 @fcmp_oeq_fsub_const(float %x, float %y) { ret i1 %cmp } +define i1 @pr185561(i32 %arg0) { +; CHECK-LABEL: @pr185561( +; CHECK-NEXT: [[V0:%.*]] = add i32 [[ARG0:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[V0]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %v0 = add i32 %arg0, -1 + %v1 = sitofp i32 %v0 to float + %v2 = fsub float 1.000000e+00, %v1 + %v3 = fcmp olt float %v2, 1.000000e+00 + ret i1 %v3 +} + +define i1 @same_const_sub_sitofp_eq(i32 %x) { +; CHECK-LABEL: @same_const_sub_sitofp_eq( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %f = sitofp i32 %x to float + %s = fsub float 1.000000e+00, %f + %cmp = fcmp oeq float %s, 1.000000e+00 + ret i1 %cmp +} + +define i1 @same_const_sub_uitofp_olt(i32 %x) { +; CHECK-LABEL: @same_const_sub_uitofp_olt( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %f = uitofp i32 %x to float + %s = fsub float 2.000000e+00, %f + %cmp = fcmp olt float %s, 2.000000e+00 + ret i1 %cmp +} + +define i1 @same_const_sub_no_fold_large_c(i32 %x) { +; CHECK-LABEL: @same_const_sub_no_fold_large_c( +; CHECK-NOT: icmp +; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[S:%.*]] = fsub float 0x417FFFFFE0000000, [[F]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[S]], 0x417FFFFFE0000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %f = sitofp i32 %x to float + %s = fsub float 3.355443e+07, %f + %cmp = fcmp oeq float %s, 3.355443e+07 + ret i1 %cmp +} + +define <2 x i1> @same_const_sub_sitofp_vec_eq(<2 x i32> %x) { +; CHECK-LABEL: @same_const_sub_sitofp_vec_eq( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %f = sitofp <2 x i32> %x to <2 x float> + %s = fsub <2 x float> , %f + %cmp = fcmp oeq <2 x float> %s, + + ret <2 x i1> %cmp +} + +define <2 x i1> @same_const_sub_uitofp_vec_olt(<2 x i32> %x) { +; CHECK-LABEL: @same_const_sub_uitofp_vec_olt( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %f = uitofp <2 x i32> %x to <2 x float> + %s = fsub <2 x float> , %f + %cmp = fcmp olt <2 x float> %s, + + ret <2 x i1> %cmp +} + +define i1 @same_const_sub_no_fold_subnormal_c(i32 %x) { +; CHECK-LABEL: @same_const_sub_no_fold_subnormal_c( +; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[S:%.*]] = fsub float 0x36A0000000000000, [[F]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[S]], 0x36A0000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %f = sitofp i32 %x to float + %s = fsub float 0x36A0000000000000, %f + %cmp = fcmp olt float %s, 0x36A0000000000000 + ret i1 %cmp +} + +define i1 @same_const_sub_no_fold_wrong_mantissa_width(i32 %x) { +; CHECK-LABEL: @same_const_sub_no_fold_wrong_mantissa_width( +; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[S:%.*]] = fsub float 0x4180000000000000, [[F]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[S]], 0x4180000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %f = sitofp i32 %x to float + %s = fsub float 3.3554432e+07, %f + %cmp = fcmp oeq float %s, 3.3554432e+07 + ret i1 %cmp +} + +define i1 @same_const_sub_sitofp_x86_fp80_eq(i32 %x) { +; CHECK-LABEL: @same_const_sub_sitofp_x86_fp80_eq( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %f = sitofp i32 %x to x86_fp80 + %s = fsub x86_fp80 0xK3FFF8000000000000000, %f + %cmp = fcmp oeq x86_fp80 %s, 0xK3FFF8000000000000000 + ret i1 %cmp +} + +define i1 @same_const_sub_no_fold_x86_fp80_large_c(i32 %x) { +; CHECK-LABEL: @same_const_sub_no_fold_x86_fp80_large_c( +; CHECK-NOT: icmp +; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[X:%.*]] to x86_fp80 +; CHECK-NEXT: [[S:%.*]] = fsub x86_fp80 0xK403F8000000000000000, [[F]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq x86_fp80 [[S]], 0xK403F8000000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %f = sitofp i32 %x to x86_fp80 + ; 2^64, so ilogb(C) == 64, which should fail `ilogb(C) < MantissaWidth` + %s = fsub x86_fp80 0xK403F8000000000000000, %f + %cmp = fcmp oeq x86_fp80 %s, 0xK403F8000000000000000 + ret i1 %cmp +} + +define i1 @same_const_sub_no_fold_ppcfp128(i32 %x) { +; CHECK-LABEL: @same_const_sub_no_fold_ppcfp128( +; CHECK-NOT: icmp +; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[X:%.*]] to ppc_fp128 +; CHECK-NEXT: [[S:%.*]] = fsub ppc_fp128 0xM3FF00000000000000000000000000000, [[F]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq ppc_fp128 [[S]], 0xM3FF00000000000000000000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %f = sitofp i32 %x to ppc_fp128 + %s = fsub ppc_fp128 0xM3FF00000000000000000000000000000, %f + %cmp = fcmp oeq ppc_fp128 %s, 0xM3FF00000000000000000000000000000 + ret i1 %cmp +} + define i1 @fcmp_oge_fsub_const(float %x, float %y) { ; CHECK-LABEL: @fcmp_oge_fsub_const( ; CHECK-NEXT: [[FS:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]