[ARM] Disable strict node mutation and use correct lowering for several strict ops (#170136)

Changes in this PR were discussed and reviewed in https://github.com/llvm/llvm-project/pull/137101.
2025-12-02 01:03:55 +03:00 · 2025-12-02 01:03:55 +03:00 · d08b0f7240
commit d08b0f7240
parent e7748e92cd
4 changed files with 1543 additions and 36 deletions
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@ -546,16 +546,24 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
      for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
                      ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
        setOperationAction(Op, MVT::f64, Legal);
+
+      setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
    }
  }

  if (Subtarget->hasFullFP16()) {
+    for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+                    ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
+      setOperationAction(Op, MVT::f16, Legal);
+
    addRegisterClass(MVT::f16, &ARM::HPRRegClass);
    setOperationAction(ISD::BITCAST, MVT::i16, Custom);
    setOperationAction(ISD::BITCAST, MVT::f16, Custom);

    setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
    setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
+    setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Legal);
+    setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Legal);
  }

  if (Subtarget->hasBF16()) {
@ -865,13 +873,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
    setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
    setOperationAction(ISD::FP_ROUND,   MVT::f32, Custom);
-    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
-    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
    setOperationAction(ISD::STRICT_FP_ROUND,   MVT::f32, Custom);
  }

+  setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
+  setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
+
  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
    setOperationAction(ISD::FP_EXTEND,  MVT::f64, Custom);
    setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
@ -879,11 +888,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
      setOperationAction(ISD::FP_ROUND,  MVT::f16, Custom);
      setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
    }
+  } else {
+    setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
  }

  if (!Subtarget->hasFP16()) {
    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Custom);
    setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
+  } else {
+    setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+    setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
  }

  computeRegisterProperties(Subtarget->getRegisterInfo());
@ -1223,16 +1237,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
    if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
      setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
      setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
-      setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, LibCall);
-      setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, LibCall);
+      setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
+      setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Expand);
    }

    // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
    if (!Subtarget->hasFP16()) {
      setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
      setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
-      setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, LibCall);
-      setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, LibCall);
+      setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Expand);
+      setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Expand);
    }

    // Strict floating-point comparisons need custom lowering.
@ -1248,34 +1262,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);

  // FP-ARMv8 implements a lot of rounding-like FP operations.
-  if (Subtarget->hasFPARMv8Base()) {
-    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
-    setOperationAction(ISD::FCEIL, MVT::f32, Legal);
-    setOperationAction(ISD::FROUND, MVT::f32, Legal);
-    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
-    setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
-    setOperationAction(ISD::FRINT, MVT::f32, Legal);
-    setOperationAction(ISD::FROUNDEVEN, MVT::f32, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+  if (Subtarget->hasFPARMv8Base()) {    
+    for (auto Op :
+         {ISD::FFLOOR,            ISD::FCEIL,             ISD::FROUND,
+          ISD::FTRUNC,            ISD::FNEARBYINT,        ISD::FRINT,
+          ISD::FROUNDEVEN,        ISD::FMINNUM,           ISD::FMAXNUM,
+          ISD::STRICT_FFLOOR,     ISD::STRICT_FCEIL,      ISD::STRICT_FROUND,
+          ISD::STRICT_FTRUNC,     ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
+          ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM,    ISD::STRICT_FMAXNUM}) {
+      setOperationAction(Op, MVT::f32, Legal);
+
+      if (Subtarget->hasFP64())
+        setOperationAction(Op, MVT::f64, Legal);
+    }
+
    if (Subtarget->hasNEON()) {
      setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
      setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
      setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
      setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
    }
-
-    if (Subtarget->hasFP64()) {
-      setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
-      setOperationAction(ISD::FCEIL, MVT::f64, Legal);
-      setOperationAction(ISD::FROUND, MVT::f64, Legal);
-      setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
-      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
-      setOperationAction(ISD::FRINT, MVT::f64, Legal);
-      setOperationAction(ISD::FROUNDEVEN, MVT::f64, Legal);
-      setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
-      setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
-    }
  }

  // FP16 often need to be promoted to call lib functions
@ -1430,6 +1436,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
      Align(1ULL << Subtarget->getPreferBranchLogAlignment()));

  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
+
+  IsStrictFPEnabled = true;
 }

 bool ARMTargetLowering::useSoftFloat() const {
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@ -814,7 +814,7 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),

 def : FP16Pat<(f32 (any_fpextend (f16 HPR:$Sm))),
              (VCVTBHS (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>;
-def : FP16Pat<(f16_to_fp GPR:$a),
+def : FP16Pat<(any_f16_to_fp GPR:$a),
              (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;

 let hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPSCR_RM] in
@ -826,7 +826,7 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda,

 def : FP16Pat<(f16 (any_fpround SPR:$Sm)),
              (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>;
-def : FP16Pat<(fp_to_f16 SPR:$a),
+def : FP16Pat<(any_fp_to_f16 SPR:$a),
              (i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>;
 def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane),
              (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
@ -891,7 +891,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
 def : FullFP16Pat<(f64 (any_fpextend (f16 HPR:$Sm))),
                  (VCVTBHD (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>,
                  Requires<[HasFPARMv8, HasDPVFP]>;
-def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
+def : FP16Pat<(f64 (any_f16_to_fp GPR:$a)),
              (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
              Requires<[HasFPARMv8, HasDPVFP]>;

@ -917,7 +917,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
 def : FullFP16Pat<(f16 (any_fpround DPR:$Dm)),
                  (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>,
                  Requires<[HasFPARMv8, HasDPVFP]>;
-def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
+def : FP16Pat<(any_fp_to_f16 (f64 DPR:$a)),
              (i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>,
                   Requires<[HasFPARMv8, HasDPVFP]>;

--- a/llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll
+++ b/llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@ -675,8 +675,8 @@ define half @frem_f16(half %x, half %y) #0 {
 ; CHECK-LABEL: frem_f16:
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
-; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
 ; CHECK-NEXT:    vcvtb.f32.f16 s1, s1
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
 ; CHECK-NEXT:    pop {r11, pc}
@ -713,7 +713,7 @@ define i32 @fptosi_i32_f16(half %x) #0 {

 define i32 @fptoui_i32_f16(half %x) #0 {
 ; CHECK-LABEL: fptoui_i32_f16:
-; CHECK:         vcvt.s32.f16 s0, s0
+; CHECK:         vcvt.u32.f16 s0, s0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bx lr
  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
@ -925,8 +925,8 @@ define half @atan2_f16(half %x, half %y) #0 {
 ; CHECK-LABEL: atan2_f16:
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
-; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
 ; CHECK-NEXT:    vcvtb.f32.f16 s1, s1
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
 ; CHECK-NEXT:    bl atan2f
 ; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
 ; CHECK-NEXT:    pop {r11, pc}
@ -974,8 +974,8 @@ define half @pow_f16(half %x, half %y) #0 {
 ; CHECK-LABEL: pow_f16:
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
-; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
 ; CHECK-NEXT:    vcvtb.f32.f16 s1, s1
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
 ; CHECK-NEXT:    pop {r11, pc}