[ARM] Disable strict node mutation and use correct lowering for several strict ops (#170136)
Changes in this PR were discussed and reviewed in https://github.com/llvm/llvm-project/pull/137101.
This commit is contained in:
parent
e7748e92cd
commit
d08b0f7240
@ -546,16 +546,24 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
|
||||
for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
|
||||
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
|
||||
setOperationAction(Op, MVT::f64, Legal);
|
||||
|
||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
|
||||
}
|
||||
}
|
||||
|
||||
if (Subtarget->hasFullFP16()) {
|
||||
for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
|
||||
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
|
||||
setOperationAction(Op, MVT::f16, Legal);
|
||||
|
||||
addRegisterClass(MVT::f16, &ARM::HPRRegClass);
|
||||
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
|
||||
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
|
||||
|
||||
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
|
||||
setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Legal);
|
||||
setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Legal);
|
||||
}
|
||||
|
||||
if (Subtarget->hasBF16()) {
|
||||
@ -865,13 +873,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
|
||||
|
||||
if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
|
||||
@ -879,11 +888,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
|
||||
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
|
||||
}
|
||||
} else {
|
||||
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
|
||||
}
|
||||
|
||||
if (!Subtarget->hasFP16()) {
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
|
||||
} else {
|
||||
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
|
||||
}
|
||||
|
||||
computeRegisterProperties(Subtarget->getRegisterInfo());
|
||||
@ -1223,16 +1237,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
|
||||
if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
|
||||
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, LibCall);
|
||||
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, LibCall);
|
||||
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
|
||||
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Expand);
|
||||
}
|
||||
|
||||
// fp16 is a special v7 extension that adds f16 <-> f32 conversions.
|
||||
if (!Subtarget->hasFP16()) {
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
|
||||
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, LibCall);
|
||||
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, LibCall);
|
||||
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Expand);
|
||||
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Expand);
|
||||
}
|
||||
|
||||
// Strict floating-point comparisons need custom lowering.
|
||||
@ -1248,34 +1262,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
|
||||
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
|
||||
|
||||
// FP-ARMv8 implements a lot of rounding-like FP operations.
|
||||
if (Subtarget->hasFPARMv8Base()) {
|
||||
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FROUND, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FRINT, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FROUNDEVEN, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
|
||||
if (Subtarget->hasFPARMv8Base()) {
|
||||
for (auto Op :
|
||||
{ISD::FFLOOR, ISD::FCEIL, ISD::FROUND,
|
||||
ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT,
|
||||
ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
|
||||
ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FROUND,
|
||||
ISD::STRICT_FTRUNC, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
|
||||
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM}) {
|
||||
setOperationAction(Op, MVT::f32, Legal);
|
||||
|
||||
if (Subtarget->hasFP64())
|
||||
setOperationAction(Op, MVT::f64, Legal);
|
||||
}
|
||||
|
||||
if (Subtarget->hasNEON()) {
|
||||
setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
|
||||
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
|
||||
}
|
||||
|
||||
if (Subtarget->hasFP64()) {
|
||||
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FROUND, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FRINT, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FROUNDEVEN, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
|
||||
}
|
||||
}
|
||||
|
||||
// FP16 often need to be promoted to call lib functions
|
||||
@ -1430,6 +1436,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
|
||||
Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
|
||||
|
||||
setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
|
||||
|
||||
IsStrictFPEnabled = true;
|
||||
}
|
||||
|
||||
bool ARMTargetLowering::useSoftFloat() const {
|
||||
|
||||
@ -814,7 +814,7 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
||||
|
||||
def : FP16Pat<(f32 (any_fpextend (f16 HPR:$Sm))),
|
||||
(VCVTBHS (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>;
|
||||
def : FP16Pat<(f16_to_fp GPR:$a),
|
||||
def : FP16Pat<(any_f16_to_fp GPR:$a),
|
||||
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
||||
|
||||
let hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPSCR_RM] in
|
||||
@ -826,7 +826,7 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda,
|
||||
|
||||
def : FP16Pat<(f16 (any_fpround SPR:$Sm)),
|
||||
(COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>;
|
||||
def : FP16Pat<(fp_to_f16 SPR:$a),
|
||||
def : FP16Pat<(any_fp_to_f16 SPR:$a),
|
||||
(i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>;
|
||||
def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane),
|
||||
(v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
|
||||
@ -891,7 +891,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
|
||||
def : FullFP16Pat<(f64 (any_fpextend (f16 HPR:$Sm))),
|
||||
(VCVTBHD (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>,
|
||||
Requires<[HasFPARMv8, HasDPVFP]>;
|
||||
def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
|
||||
def : FP16Pat<(f64 (any_f16_to_fp GPR:$a)),
|
||||
(VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
|
||||
Requires<[HasFPARMv8, HasDPVFP]>;
|
||||
|
||||
@ -917,7 +917,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
|
||||
def : FullFP16Pat<(f16 (any_fpround DPR:$Dm)),
|
||||
(COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>,
|
||||
Requires<[HasFPARMv8, HasDPVFP]>;
|
||||
def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
|
||||
def : FP16Pat<(any_fp_to_f16 (f64 DPR:$a)),
|
||||
(i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>,
|
||||
Requires<[HasFPARMv8, HasDPVFP]>;
|
||||
|
||||
|
||||
1499
llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll
Normal file
1499
llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -675,8 +675,8 @@ define half @frem_f16(half %x, half %y) #0 {
|
||||
; CHECK-LABEL: frem_f16:
|
||||
; CHECK: .save {r11, lr}
|
||||
; CHECK-NEXT: push {r11, lr}
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s1, s1
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
|
||||
; CHECK-NEXT: bl fmodf
|
||||
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
|
||||
; CHECK-NEXT: pop {r11, pc}
|
||||
@ -713,7 +713,7 @@ define i32 @fptosi_i32_f16(half %x) #0 {
|
||||
|
||||
define i32 @fptoui_i32_f16(half %x) #0 {
|
||||
; CHECK-LABEL: fptoui_i32_f16:
|
||||
; CHECK: vcvt.s32.f16 s0, s0
|
||||
; CHECK: vcvt.u32.f16 s0, s0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: bx lr
|
||||
%val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
|
||||
@ -925,8 +925,8 @@ define half @atan2_f16(half %x, half %y) #0 {
|
||||
; CHECK-LABEL: atan2_f16:
|
||||
; CHECK: .save {r11, lr}
|
||||
; CHECK-NEXT: push {r11, lr}
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s1, s1
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
|
||||
; CHECK-NEXT: bl atan2f
|
||||
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
|
||||
; CHECK-NEXT: pop {r11, pc}
|
||||
@ -974,8 +974,8 @@ define half @pow_f16(half %x, half %y) #0 {
|
||||
; CHECK-LABEL: pow_f16:
|
||||
; CHECK: .save {r11, lr}
|
||||
; CHECK-NEXT: push {r11, lr}
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s1, s1
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
|
||||
; CHECK-NEXT: bl powf
|
||||
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
|
||||
; CHECK-NEXT: pop {r11, pc}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user