[InstCombine] Fold fcmp (C - [su]itofp X), C to integer compares (#185826)

Recognize `fcmp pred (C - [su]itofp X), C` in InstCombine and fold it to `fcmp swap(pred) [su]itofp X, 0` for certain constant `C` (to make sure `C - Y` nevers rounds back to `C`), then the new pattern further can be folded by `foldFCmpIntToFPConst` to integer compares. Fixes #185561 alive2: https://alive2.llvm.org/ce/z/9dWsCb alive2 with constant constraints (needs local alive2 build): https://alive2.llvm.org/ce/z/wDs9Tj I tried generalizing the pattern to any `fcmp pred, (C - Y), C` but alive2 says no: https://alive2.llvm.org/ce/z/qMLGah. So I will try to find more constraints on C and Y to make this rewrite hold in future PRs.
2026-03-27 03:18:15 +08:00 · 2026-03-27 03:18:15 +08:00 · 0383cd0f77
commit 0383cd0f77
parent 423f410e7e
2 changed files with 169 additions and 0 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@ -8673,6 +8673,36 @@ static Instruction *foldFCmpFSubIntoFCmp(FCmpInst &I, Instruction *LHSI,
        I.setHasNoNaNs(true);
      return &I;
    }
+    // fcmp `pred (C - Y), C` -> `fcmp swap(pred), Y, 0`
+    // where C and Y can't be arbitrary floating-point values.
+    // For example, with `C = 1.0f` and `Y = 0x1p-149`, `1.0f - Y` rounds back
+    // to `1.0f`, so the source compare is false while the rewritten compare is
+    // true.
+    // We need to make sure (C - Y) never rounds back to C
+    const APFloat *C;
+    Value *IntSrc;
+    if (match(RHSC, m_APFloat(C)) &&
+        match(LHSI, m_FSub(m_Specific(RHSC), m_IToFP(m_Value(IntSrc)))) &&
+        C->isNormal()) {
+      // Requirements on C and Y:
+      // 1. C is finite, nonzero, normal.
+      // 2. C shouldn't be too large, that is, ULP(C) <= 1.
+      // 3. Y must be the form of `[su]itofp`, so the finite nonzero result of Y
+      // must be integer-valued with an absolute value of at least 1;
+      // as long as the step size near C does not exceed 1,
+      // C - Y cannot be rounded back to C when Y != 0.
+      // 4. If Y = 0, `fcmp pred (C - 0), C` are equivalent to `fcmp swap(pred)
+      // 0, 0` for ordered and unordered predicates as long as C is finite and
+      // nonzero.
+      int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+      if (MantissaWidth != -1 && ilogb(*C) < MantissaWidth) {
+        Constant *ZeroC = ConstantFP::getZero(LHSI->getType());
+        I.setPredicate(I.getSwappedPredicate());
+        CI.replaceOperand(I, 0, Y);
+        CI.replaceOperand(I, 1, ZeroC);
+        return &I;
+      }
+    }
    break;
  }

--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@ -1812,6 +1812,145 @@ define i1 @fcmp_oeq_fsub_const(float %x, float %y) {
  ret i1 %cmp
 }

+define i1 @pr185561(i32 %arg0) {
+; CHECK-LABEL: @pr185561(
+; CHECK-NEXT:    [[V0:%.*]] = add i32 [[ARG0:%.*]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[V0]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %v0 = add i32 %arg0, -1
+  %v1 = sitofp i32 %v0 to float
+  %v2 = fsub float 1.000000e+00, %v1
+  %v3 = fcmp olt float %v2, 1.000000e+00
+  ret i1 %v3
+}
+
+define i1 @same_const_sub_sitofp_eq(i32 %x) {
+; CHECK-LABEL: @same_const_sub_sitofp_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %x to float
+  %s = fsub float 1.000000e+00, %f
+  %cmp = fcmp oeq float %s, 1.000000e+00
+  ret i1 %cmp
+}
+
+define i1 @same_const_sub_uitofp_olt(i32 %x) {
+; CHECK-LABEL: @same_const_sub_uitofp_olt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %x to float
+  %s = fsub float 2.000000e+00, %f
+  %cmp = fcmp olt float %s, 2.000000e+00
+  ret i1 %cmp
+}
+
+define i1 @same_const_sub_no_fold_large_c(i32 %x) {
+; CHECK-LABEL: @same_const_sub_no_fold_large_c(
+; CHECK-NOT:    icmp
+; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[X:%.*]] to float
+; CHECK-NEXT:    [[S:%.*]] = fsub float 0x417FFFFFE0000000, [[F]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[S]], 0x417FFFFFE0000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %x to float
+  %s = fsub float 3.355443e+07, %f
+  %cmp = fcmp oeq float %s, 3.355443e+07
+  ret i1 %cmp
+}
+
+define <2 x i1> @same_const_sub_sitofp_vec_eq(<2 x i32> %x) {
+; CHECK-LABEL: @same_const_sub_sitofp_vec_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %f = sitofp <2 x i32> %x to <2 x float>
+  %s = fsub <2 x float> <float 1.000000e+00, float 1.000000e+00>, %f
+  %cmp = fcmp oeq <2 x float> %s,
+                   <float 1.000000e+00, float 1.000000e+00>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @same_const_sub_uitofp_vec_olt(<2 x i32> %x) {
+; CHECK-LABEL: @same_const_sub_uitofp_vec_olt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %f = uitofp <2 x i32> %x to <2 x float>
+  %s = fsub <2 x float> <float 2.000000e+00, float 2.000000e+00>, %f
+  %cmp = fcmp olt <2 x float> %s,
+                   <float 2.000000e+00, float 2.000000e+00>
+  ret <2 x i1> %cmp
+}
+
+define i1 @same_const_sub_no_fold_subnormal_c(i32 %x) {
+; CHECK-LABEL: @same_const_sub_no_fold_subnormal_c(
+; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[X:%.*]] to float
+; CHECK-NEXT:    [[S:%.*]] = fsub float 0x36A0000000000000, [[F]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float [[S]], 0x36A0000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %x to float
+  %s = fsub float 0x36A0000000000000, %f
+  %cmp = fcmp olt float %s, 0x36A0000000000000
+  ret i1 %cmp
+}
+
+define i1 @same_const_sub_no_fold_wrong_mantissa_width(i32 %x) {
+; CHECK-LABEL: @same_const_sub_no_fold_wrong_mantissa_width(
+; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[X:%.*]] to float
+; CHECK-NEXT:    [[S:%.*]] = fsub float 0x4180000000000000, [[F]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[S]], 0x4180000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %x to float
+  %s = fsub float 3.3554432e+07, %f
+  %cmp = fcmp oeq float %s, 3.3554432e+07
+  ret i1 %cmp
+}
+
+define i1 @same_const_sub_sitofp_x86_fp80_eq(i32 %x) {
+; CHECK-LABEL: @same_const_sub_sitofp_x86_fp80_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %x to x86_fp80
+  %s = fsub x86_fp80 0xK3FFF8000000000000000, %f
+  %cmp = fcmp oeq x86_fp80 %s, 0xK3FFF8000000000000000
+  ret i1 %cmp
+}
+
+define i1 @same_const_sub_no_fold_x86_fp80_large_c(i32 %x) {
+; CHECK-LABEL: @same_const_sub_no_fold_x86_fp80_large_c(
+; CHECK-NOT:    icmp
+; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[X:%.*]] to x86_fp80
+; CHECK-NEXT:    [[S:%.*]] = fsub x86_fp80 0xK403F8000000000000000, [[F]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq x86_fp80 [[S]], 0xK403F8000000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %x to x86_fp80
+  ; 2^64, so ilogb(C) == 64, which should fail `ilogb(C) < MantissaWidth`
+  %s = fsub x86_fp80 0xK403F8000000000000000, %f
+  %cmp = fcmp oeq x86_fp80 %s, 0xK403F8000000000000000
+  ret i1 %cmp
+}
+
+define i1 @same_const_sub_no_fold_ppcfp128(i32 %x) {
+; CHECK-LABEL: @same_const_sub_no_fold_ppcfp128(
+; CHECK-NOT:    icmp
+; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[X:%.*]] to ppc_fp128
+; CHECK-NEXT:    [[S:%.*]] = fsub ppc_fp128 0xM3FF00000000000000000000000000000, [[F]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq ppc_fp128 [[S]], 0xM3FF00000000000000000000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %x to ppc_fp128
+  %s = fsub ppc_fp128 0xM3FF00000000000000000000000000000, %f
+  %cmp = fcmp oeq ppc_fp128 %s, 0xM3FF00000000000000000000000000000
+  ret i1 %cmp
+}
+
 define i1 @fcmp_oge_fsub_const(float %x, float %y) {
 ; CHECK-LABEL: @fcmp_oge_fsub_const(
 ; CHECK-NEXT:    [[FS:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]