[ARM] Disable strict node mutation and use correct lowering for several strict ops (#170136)

Changes in this PR were discussed and reviewed in
https://github.com/llvm/llvm-project/pull/137101.
This commit is contained in:
Erik Enikeev 2025-12-02 01:03:55 +03:00 committed by GitHub
parent e7748e92cd
commit d08b0f7240
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 1543 additions and 36 deletions

View File

@ -546,16 +546,24 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
setOperationAction(Op, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
}
}
if (Subtarget->hasFullFP16()) {
for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
setOperationAction(Op, MVT::f16, Legal);
addRegisterClass(MVT::f16, &ARM::HPRRegClass);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Legal);
}
if (Subtarget->hasBF16()) {
@ -865,13 +873,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
}
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
@ -879,11 +888,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
}
} else {
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
}
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
} else {
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
}
computeRegisterProperties(Subtarget->getRegisterInfo());
@ -1223,16 +1237,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, LibCall);
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, LibCall);
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Expand);
}
// fp16 is a special v7 extension that adds f16 <-> f32 conversions.
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, LibCall);
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, LibCall);
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Expand);
}
// Strict floating-point comparisons need custom lowering.
@ -1248,34 +1262,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// FP-ARMv8 implements a lot of rounding-like FP operations.
if (Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
if (Subtarget->hasFPARMv8Base()) {
for (auto Op :
{ISD::FFLOOR, ISD::FCEIL, ISD::FROUND,
ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT,
ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FROUND,
ISD::STRICT_FTRUNC, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM}) {
setOperationAction(Op, MVT::f32, Legal);
if (Subtarget->hasFP64())
setOperationAction(Op, MVT::f64, Legal);
}
if (Subtarget->hasNEON()) {
setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
}
if (Subtarget->hasFP64()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f64, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
}
}
// FP16 often need to be promoted to call lib functions
@ -1430,6 +1436,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
IsStrictFPEnabled = true;
}
bool ARMTargetLowering::useSoftFloat() const {

View File

@ -814,7 +814,7 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
def : FP16Pat<(f32 (any_fpextend (f16 HPR:$Sm))),
(VCVTBHS (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>;
def : FP16Pat<(f16_to_fp GPR:$a),
def : FP16Pat<(any_f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
let hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPSCR_RM] in
@ -826,7 +826,7 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda,
def : FP16Pat<(f16 (any_fpround SPR:$Sm)),
(COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>;
def : FP16Pat<(fp_to_f16 SPR:$a),
def : FP16Pat<(any_fp_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>;
def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane),
(v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
@ -891,7 +891,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
def : FullFP16Pat<(f64 (any_fpextend (f16 HPR:$Sm))),
(VCVTBHD (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
def : FP16Pat<(f64 (any_f16_to_fp GPR:$a)),
(VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
@ -917,7 +917,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
def : FullFP16Pat<(f16 (any_fpround DPR:$Dm)),
(COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>,
Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
def : FP16Pat<(any_fp_to_f16 (f64 DPR:$a)),
(i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;

File diff suppressed because it is too large Load Diff

View File

@ -675,8 +675,8 @@ define half @frem_f16(half %x, half %y) #0 {
; CHECK-LABEL: frem_f16:
; CHECK: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: vcvtb.f32.f16 s1, s1
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: bl fmodf
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-NEXT: pop {r11, pc}
@ -713,7 +713,7 @@ define i32 @fptosi_i32_f16(half %x) #0 {
define i32 @fptoui_i32_f16(half %x) #0 {
; CHECK-LABEL: fptoui_i32_f16:
; CHECK: vcvt.s32.f16 s0, s0
; CHECK: vcvt.u32.f16 s0, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
%val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
@ -925,8 +925,8 @@ define half @atan2_f16(half %x, half %y) #0 {
; CHECK-LABEL: atan2_f16:
; CHECK: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: vcvtb.f32.f16 s1, s1
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: bl atan2f
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-NEXT: pop {r11, pc}
@ -974,8 +974,8 @@ define half @pow_f16(half %x, half %y) #0 {
; CHECK-LABEL: pow_f16:
; CHECK: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: vcvtb.f32.f16 s1, s1
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: bl powf
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-NEXT: pop {r11, pc}