[CodeGen] More consistently expand float ops by default (#150597)

These float operations were expanded for scalar f32/f64/f128, but not
for f16 and more problematically, not for vectors. A small subset of
them was separately set to expand for vectors.

Change these to always expand by default, and adjust targets to mark
these as legal where necessary instead.

This is a much safer default, and avoids unnecessary legalization
failures because a target failed to manually mark them as expand.

Fixes https://github.com/llvm/llvm-project/issues/110753.
Fixes https://github.com/llvm/llvm-project/issues/121390.

(cherry picked from commit fe0dbe0f2950d95071be7140c7b4680f17a7ac4e)
This commit is contained in:
Nikita Popov 2025-07-28 09:46:00 +02:00 committed by Tobias Hieta
parent 431af6d04c
commit 81a3436485
4 changed files with 145 additions and 21 deletions

View File

@ -806,7 +806,17 @@ void TargetLoweringBase::initActions() {
ISD::SDIVFIX, ISD::SDIVFIXSAT,
ISD::UDIVFIX, ISD::UDIVFIXSAT,
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
ISD::IS_FPCLASS},
ISD::IS_FPCLASS, ISD::FCBRT,
ISD::FLOG, ISD::FLOG2,
ISD::FLOG10, ISD::FEXP,
ISD::FEXP2, ISD::FEXP10,
ISD::FFLOOR, ISD::FNEARBYINT,
ISD::FCEIL, ISD::FRINT,
ISD::FTRUNC, ISD::FROUNDEVEN,
ISD::FTAN, ISD::FACOS,
ISD::FASIN, ISD::FATAN,
ISD::FCOSH, ISD::FSINH,
ISD::FTANH, ISD::FATAN2},
VT, Expand);
// Overflow operations default to expand
@ -852,13 +862,12 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT.isVector())
setOperationAction(
{ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND,
ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN,
ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2},
VT, Expand);
setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG,
ISD::ANY_EXTEND_VECTOR_INREG,
ISD::SIGN_EXTEND_VECTOR_INREG,
ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR,
ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND},
VT, Expand);
// Constrained floating-point operations default to expand.
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
@ -914,15 +923,6 @@ void TargetLoweringBase::initActions() {
{MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
Expand);
// These library functions default to expand.
setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR,
ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN,
ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH,
ISD::FATAN2},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
// Insert custom handling default for llvm.canonicalize.*.
setOperationAction(ISD::FCANONICALIZE,
{MVT::f16, MVT::f32, MVT::f64, MVT::f128}, Expand);

View File

@ -392,8 +392,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// Library functions. These default to Expand, but we have instructions
// for them.
setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
MVT::f32, Legal);
ISD::FROUNDEVEN, ISD::FTRUNC},
{MVT::f16, MVT::f32}, Legal);
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Custom);
setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
@ -413,9 +414,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
if (Subtarget->has16BitInsts())
if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
else {
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Legal);
} else {
setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
}

View File

@ -370,6 +370,11 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
setOperationAction(ISD::FROUND, VT, Legal);
setOperationAction(ISD::FROUNDEVEN, VT, Legal);
setOperationAction(ISD::FRINT, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
@ -1507,6 +1512,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
setOperationAction(ISD::FRINT, MVT::f16, Legal);
setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
setOperationAction(ISD::FCEIL, MVT::f16, Legal);
}
if (Subtarget->hasNEON()) {

View File

@ -0,0 +1,111 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s
define void @test(ptr %p1, ptr %p2) nounwind {
; CHECK-LABEL: test:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr 0
; CHECK-NEXT: stdu 1, -224(1)
; CHECK-NEXT: li 5, 48
; CHECK-NEXT: std 0, 240(1)
; CHECK-NEXT: std 27, 184(1) # 8-byte Folded Spill
; CHECK-NEXT: li 27, 16
; CHECK-NEXT: std 28, 192(1) # 8-byte Folded Spill
; CHECK-NEXT: std 29, 200(1) # 8-byte Folded Spill
; CHECK-NEXT: li 29, 32
; CHECK-NEXT: li 28, 48
; CHECK-NEXT: stxvd2x 56, 1, 5 # 16-byte Folded Spill
; CHECK-NEXT: li 5, 64
; CHECK-NEXT: std 30, 208(1) # 8-byte Folded Spill
; CHECK-NEXT: mr 30, 4
; CHECK-NEXT: stxvd2x 57, 1, 5 # 16-byte Folded Spill
; CHECK-NEXT: li 5, 80
; CHECK-NEXT: stxvd2x 58, 1, 5 # 16-byte Folded Spill
; CHECK-NEXT: li 5, 96
; CHECK-NEXT: lxvd2x 58, 0, 3
; CHECK-NEXT: stxvd2x 59, 1, 5 # 16-byte Folded Spill
; CHECK-NEXT: li 5, 112
; CHECK-NEXT: lxvd2x 59, 3, 27
; CHECK-NEXT: stxvd2x 60, 1, 5 # 16-byte Folded Spill
; CHECK-NEXT: li 5, 128
; CHECK-NEXT: stxvd2x 61, 1, 5 # 16-byte Folded Spill
; CHECK-NEXT: li 5, 144
; CHECK-NEXT: stxvd2x 62, 1, 5 # 16-byte Folded Spill
; CHECK-NEXT: li 5, 160
; CHECK-NEXT: lxvd2x 62, 3, 28
; CHECK-NEXT: stxvd2x 63, 1, 5 # 16-byte Folded Spill
; CHECK-NEXT: lxvd2x 63, 3, 29
; CHECK-NEXT: xxswapd 57, 58
; CHECK-NEXT: xxswapd 1, 59
; CHECK-NEXT: xxswapd 60, 62
; CHECK-NEXT: xxswapd 61, 63
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd 56, 1
; CHECK-NEXT: xxlor 1, 59, 59
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd 0, 1
; CHECK-NEXT: xxlor 1, 60, 60
; CHECK-NEXT: xxmrgld 59, 0, 56
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd 60, 1
; CHECK-NEXT: xxlor 1, 62, 62
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd 0, 1
; CHECK-NEXT: xxlor 1, 61, 61
; CHECK-NEXT: xxmrgld 62, 0, 60
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd 61, 1
; CHECK-NEXT: xxlor 1, 63, 63
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd 0, 1
; CHECK-NEXT: xxlor 1, 57, 57
; CHECK-NEXT: xxmrgld 63, 0, 61
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd 61, 1
; CHECK-NEXT: xxlor 1, 58, 58
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 160
; CHECK-NEXT: stxvd2x 63, 30, 29
; CHECK-NEXT: xxswapd 0, 1
; CHECK-NEXT: stxvd2x 62, 30, 28
; CHECK-NEXT: stxvd2x 59, 30, 27
; CHECK-NEXT: ld 29, 200(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 28, 192(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 27, 184(1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: li 3, 144
; CHECK-NEXT: xxmrgld 0, 0, 61
; CHECK-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: li 3, 128
; CHECK-NEXT: stxvd2x 0, 0, 30
; CHECK-NEXT: ld 30, 208(1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: li 3, 112
; CHECK-NEXT: lxvd2x 60, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: li 3, 96
; CHECK-NEXT: lxvd2x 59, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: li 3, 80
; CHECK-NEXT: lxvd2x 58, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: li 3, 64
; CHECK-NEXT: lxvd2x 57, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: li 3, 48
; CHECK-NEXT: lxvd2x 56, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: addi 1, 1, 224
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: mtlr 0
; CHECK-NEXT: blr
%v = load <8 x double>, ptr %p1, align 64
%res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %v)
store <8 x double> %res, ptr %p2, align 64
ret void
}
declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)