[InstCombine] Fold fcmp (C - [su]itofp X), C to integer compares (#185826)

Recognize `fcmp pred (C - [su]itofp X), C` in InstCombine and fold it to
`fcmp swap(pred) [su]itofp X, 0` for certain constant `C` (to make sure
`C - Y` nevers rounds back to `C`), then the new pattern further can be
folded by `foldFCmpIntToFPConst` to integer compares.

Fixes #185561
alive2: https://alive2.llvm.org/ce/z/9dWsCb
alive2 with constant constraints (needs local alive2 build):
https://alive2.llvm.org/ce/z/wDs9Tj

I tried generalizing the pattern to any `fcmp pred, (C - Y), C` but
alive2 says no: https://alive2.llvm.org/ce/z/qMLGah. So I will try to
find more constraints on C and Y to make this rewrite hold in future
PRs.
This commit is contained in:
Kiva 2026-03-27 03:18:15 +08:00 committed by GitHub
parent 423f410e7e
commit 0383cd0f77
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 169 additions and 0 deletions

View File

@ -8673,6 +8673,36 @@ static Instruction *foldFCmpFSubIntoFCmp(FCmpInst &I, Instruction *LHSI,
I.setHasNoNaNs(true);
return &I;
}
// fcmp `pred (C - Y), C` -> `fcmp swap(pred), Y, 0`
// where C and Y can't be arbitrary floating-point values.
// For example, with `C = 1.0f` and `Y = 0x1p-149`, `1.0f - Y` rounds back
// to `1.0f`, so the source compare is false while the rewritten compare is
// true.
// We need to make sure (C - Y) never rounds back to C
const APFloat *C;
Value *IntSrc;
if (match(RHSC, m_APFloat(C)) &&
match(LHSI, m_FSub(m_Specific(RHSC), m_IToFP(m_Value(IntSrc)))) &&
C->isNormal()) {
// Requirements on C and Y:
// 1. C is finite, nonzero, normal.
// 2. C shouldn't be too large, that is, ULP(C) <= 1.
// 3. Y must be the form of `[su]itofp`, so the finite nonzero result of Y
// must be integer-valued with an absolute value of at least 1;
// as long as the step size near C does not exceed 1,
// C - Y cannot be rounded back to C when Y != 0.
// 4. If Y = 0, `fcmp pred (C - 0), C` are equivalent to `fcmp swap(pred)
// 0, 0` for ordered and unordered predicates as long as C is finite and
// nonzero.
int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
if (MantissaWidth != -1 && ilogb(*C) < MantissaWidth) {
Constant *ZeroC = ConstantFP::getZero(LHSI->getType());
I.setPredicate(I.getSwappedPredicate());
CI.replaceOperand(I, 0, Y);
CI.replaceOperand(I, 1, ZeroC);
return &I;
}
}
break;
}

View File

@ -1812,6 +1812,145 @@ define i1 @fcmp_oeq_fsub_const(float %x, float %y) {
ret i1 %cmp
}
define i1 @pr185561(i32 %arg0) {
; CHECK-LABEL: @pr185561(
; CHECK-NEXT: [[V0:%.*]] = add i32 [[ARG0:%.*]], -1
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[V0]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
%v0 = add i32 %arg0, -1
%v1 = sitofp i32 %v0 to float
%v2 = fsub float 1.000000e+00, %v1
%v3 = fcmp olt float %v2, 1.000000e+00
ret i1 %v3
}
define i1 @same_const_sub_sitofp_eq(i32 %x) {
; CHECK-LABEL: @same_const_sub_sitofp_eq(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
%f = sitofp i32 %x to float
%s = fsub float 1.000000e+00, %f
%cmp = fcmp oeq float %s, 1.000000e+00
ret i1 %cmp
}
define i1 @same_const_sub_uitofp_olt(i32 %x) {
; CHECK-LABEL: @same_const_sub_uitofp_olt(
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
%f = uitofp i32 %x to float
%s = fsub float 2.000000e+00, %f
%cmp = fcmp olt float %s, 2.000000e+00
ret i1 %cmp
}
define i1 @same_const_sub_no_fold_large_c(i32 %x) {
; CHECK-LABEL: @same_const_sub_no_fold_large_c(
; CHECK-NOT: icmp
; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[X:%.*]] to float
; CHECK-NEXT: [[S:%.*]] = fsub float 0x417FFFFFE0000000, [[F]]
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[S]], 0x417FFFFFE0000000
; CHECK-NEXT: ret i1 [[CMP]]
;
%f = sitofp i32 %x to float
%s = fsub float 3.355443e+07, %f
%cmp = fcmp oeq float %s, 3.355443e+07
ret i1 %cmp
}
define <2 x i1> @same_const_sub_sitofp_vec_eq(<2 x i32> %x) {
; CHECK-LABEL: @same_const_sub_sitofp_vec_eq(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], zeroinitializer
; CHECK-NEXT: ret <2 x i1> [[CMP]]
;
%f = sitofp <2 x i32> %x to <2 x float>
%s = fsub <2 x float> <float 1.000000e+00, float 1.000000e+00>, %f
%cmp = fcmp oeq <2 x float> %s,
<float 1.000000e+00, float 1.000000e+00>
ret <2 x i1> %cmp
}
define <2 x i1> @same_const_sub_uitofp_vec_olt(<2 x i32> %x) {
; CHECK-LABEL: @same_const_sub_uitofp_vec_olt(
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
; CHECK-NEXT: ret <2 x i1> [[CMP]]
;
%f = uitofp <2 x i32> %x to <2 x float>
%s = fsub <2 x float> <float 2.000000e+00, float 2.000000e+00>, %f
%cmp = fcmp olt <2 x float> %s,
<float 2.000000e+00, float 2.000000e+00>
ret <2 x i1> %cmp
}
define i1 @same_const_sub_no_fold_subnormal_c(i32 %x) {
; CHECK-LABEL: @same_const_sub_no_fold_subnormal_c(
; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[X:%.*]] to float
; CHECK-NEXT: [[S:%.*]] = fsub float 0x36A0000000000000, [[F]]
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[S]], 0x36A0000000000000
; CHECK-NEXT: ret i1 [[CMP]]
;
%f = sitofp i32 %x to float
%s = fsub float 0x36A0000000000000, %f
%cmp = fcmp olt float %s, 0x36A0000000000000
ret i1 %cmp
}
define i1 @same_const_sub_no_fold_wrong_mantissa_width(i32 %x) {
; CHECK-LABEL: @same_const_sub_no_fold_wrong_mantissa_width(
; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[X:%.*]] to float
; CHECK-NEXT: [[S:%.*]] = fsub float 0x4180000000000000, [[F]]
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[S]], 0x4180000000000000
; CHECK-NEXT: ret i1 [[CMP]]
;
%f = sitofp i32 %x to float
%s = fsub float 3.3554432e+07, %f
%cmp = fcmp oeq float %s, 3.3554432e+07
ret i1 %cmp
}
define i1 @same_const_sub_sitofp_x86_fp80_eq(i32 %x) {
; CHECK-LABEL: @same_const_sub_sitofp_x86_fp80_eq(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
%f = sitofp i32 %x to x86_fp80
%s = fsub x86_fp80 0xK3FFF8000000000000000, %f
%cmp = fcmp oeq x86_fp80 %s, 0xK3FFF8000000000000000
ret i1 %cmp
}
define i1 @same_const_sub_no_fold_x86_fp80_large_c(i32 %x) {
; CHECK-LABEL: @same_const_sub_no_fold_x86_fp80_large_c(
; CHECK-NOT: icmp
; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[X:%.*]] to x86_fp80
; CHECK-NEXT: [[S:%.*]] = fsub x86_fp80 0xK403F8000000000000000, [[F]]
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq x86_fp80 [[S]], 0xK403F8000000000000000
; CHECK-NEXT: ret i1 [[CMP]]
;
%f = sitofp i32 %x to x86_fp80
; 2^64, so ilogb(C) == 64, which should fail `ilogb(C) < MantissaWidth`
%s = fsub x86_fp80 0xK403F8000000000000000, %f
%cmp = fcmp oeq x86_fp80 %s, 0xK403F8000000000000000
ret i1 %cmp
}
define i1 @same_const_sub_no_fold_ppcfp128(i32 %x) {
; CHECK-LABEL: @same_const_sub_no_fold_ppcfp128(
; CHECK-NOT: icmp
; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[X:%.*]] to ppc_fp128
; CHECK-NEXT: [[S:%.*]] = fsub ppc_fp128 0xM3FF00000000000000000000000000000, [[F]]
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq ppc_fp128 [[S]], 0xM3FF00000000000000000000000000000
; CHECK-NEXT: ret i1 [[CMP]]
;
%f = sitofp i32 %x to ppc_fp128
%s = fsub ppc_fp128 0xM3FF00000000000000000000000000000, %f
%cmp = fcmp oeq ppc_fp128 %s, 0xM3FF00000000000000000000000000000
ret i1 %cmp
}
define i1 @fcmp_oge_fsub_const(float %x, float %y) {
; CHECK-LABEL: @fcmp_oge_fsub_const(
; CHECK-NEXT: [[FS:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]