Cullen Rhodes 5d089d9a83 [DAGCombiner] Fix invalid size request in combineRepeatedFPDivisors
If we have a vector FP division with a splatted divisor, use
getVectorMinNumElements when scaling the num of uses by splat factor.

For AArch64 the combine kicks in for the <vscale x 4 x float> case since it's
above the fdiv threshold (3) when scaling num uses by splat factor, but the
codegen is worse (splat + vector fdiv + vector fmul) than the <vscale x 2 x
double> case (splat + vector fdiv).

If the combine could be converted into a scalar FP division by
scalarizeBinOpOfSplats it may be cheaper, but it looks like this is predicated
on the isExtractVecEltCheap TLI function which is implemented for x86 but not
AArch64. Perhaps for now combineRepeatedFPDivisors should only scale num uses
by splat if the division can be converted into scalar op.

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D118343
2022-01-28 17:01:08 +00:00

221 lines
8.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
; Following test cases check:
; a / D; b / D; c / D;
; =>
; recip = 1.0 / D; a * recip; b * recip; c * recip;
define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
; CHECK-LABEL: three_fdiv_float:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s4, #1.00000000
; CHECK-NEXT: fdiv s4, s4, s0
; CHECK-NEXT: fmul s0, s1, s4
; CHECK-NEXT: fmul s1, s2, s4
; CHECK-NEXT: fmul s2, s3, s4
; CHECK-NEXT: b foo_3f
%div = fdiv float %a, %D
%div1 = fdiv float %b, %D
%div2 = fdiv float %c, %D
tail call void @foo_3f(float %div, float %div1, float %div2)
ret void
}
define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
; CHECK-LABEL: three_fdiv_double:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d4, #1.00000000
; CHECK-NEXT: fdiv d4, d4, d0
; CHECK-NEXT: fmul d0, d1, d4
; CHECK-NEXT: fmul d1, d2, d4
; CHECK-NEXT: fmul d2, d3, d4
; CHECK-NEXT: b foo_3d
%div = fdiv double %a, %D
%div1 = fdiv double %b, %D
%div2 = fdiv double %c, %D
tail call void @foo_3d(double %div, double %div1, double %div2)
ret void
}
define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; CHECK-LABEL: three_fdiv_4xfloat:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v4.4s, #1.00000000
; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
; CHECK-NEXT: b foo_3_4xf
%div = fdiv <4 x float> %a, %D
%div1 = fdiv <4 x float> %b, %D
%div2 = fdiv <4 x float> %c, %D
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
ret void
}
define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
; CHECK-LABEL: three_fdiv_2xdouble:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v4.2d, #1.00000000
; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d
; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d
; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d
; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d
; CHECK-NEXT: b foo_3_2xd
%div = fdiv <2 x double> %a, %D
%div1 = fdiv <2 x double> %b, %D
%div2 = fdiv <2 x double> %c, %D
tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
ret void
}
; Following test cases check we never combine two FDIVs if neither of them
; calculates a reciprocal.
define void @two_fdiv_float(float %D, float %a, float %b) #0 {
; CHECK-LABEL: two_fdiv_float:
; CHECK: // %bb.0:
; CHECK-NEXT: fdiv s3, s1, s0
; CHECK-NEXT: fdiv s1, s2, s0
; CHECK-NEXT: fmov s0, s3
; CHECK-NEXT: b foo_2f
%div = fdiv float %a, %D
%div1 = fdiv float %b, %D
tail call void @foo_2f(float %div, float %div1)
ret void
}
define void @two_fdiv_double(double %D, double %a, double %b) #0 {
; CHECK-LABEL: two_fdiv_double:
; CHECK: // %bb.0:
; CHECK-NEXT: fdiv d3, d1, d0
; CHECK-NEXT: fdiv d1, d2, d0
; CHECK-NEXT: fmov d0, d3
; CHECK-NEXT: b foo_2d
%div = fdiv double %a, %D
%div1 = fdiv double %b, %D
tail call void @foo_2d(double %div, double %div1)
ret void
}
define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; CHECK-LABEL: splat_three_fdiv_4xfloat:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v4.4s, #1.00000000
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
; CHECK-NEXT: b foo_3_4xf
%D.ins = insertelement <4 x float> poison, float %D, i64 0
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
%div = fdiv <4 x float> %a, %splat
%div1 = fdiv <4 x float> %b, %splat
%div2 = fdiv <4 x float> %c, %splat
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
ret void
}
define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
; CHECK-LABEL: splat_fdiv_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov v2.4s, #1.00000000
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: fdiv v0.4s, v2.4s, v0.4s
; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ret
entry:
%D.ins = insertelement <4 x float> poison, float %D, i64 0
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
%div = fdiv <4 x float> %a, %splat
ret <4 x float> %div
}
define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
; CHECK-LABEL: splat_fdiv_nxv4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: fmov z2.s, #1.00000000
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: fmul z0.s, z1.s, z0.s
; CHECK-NEXT: ret
entry:
%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
%div = fdiv <vscale x 4 x float> %a, %splat
ret <vscale x 4 x float> %div
}
define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
; CHECK-LABEL: splat_three_fdiv_nxv4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: fmov z4.s, #1.00000000
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: fdiv z4.s, p0/m, z4.s, z0.s
; CHECK-NEXT: fmul z0.s, z1.s, z4.s
; CHECK-NEXT: fmul z1.s, z2.s, z4.s
; CHECK-NEXT: fmul z2.s, z3.s, z4.s
; CHECK-NEXT: b foo_3_nxv4f32
entry:
%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
%div = fdiv <vscale x 4 x float> %a, %splat
%div1 = fdiv <vscale x 4 x float> %b, %splat
%div2 = fdiv <vscale x 4 x float> %c, %splat
tail call void @foo_3_nxv4f32(<vscale x 4 x float> %div, <vscale x 4 x float> %div1, <vscale x 4 x float> %div2)
ret void
}
define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
; CHECK-LABEL: splat_fdiv_nxv2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, d0
; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
%splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
%div = fdiv <vscale x 2 x double> %a, %splat
ret <vscale x 2 x double> %div
}
define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
; CHECK-LABEL: splat_two_fdiv_nxv2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fmov z3.d, #1.00000000
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, d0
; CHECK-NEXT: fdiv z3.d, p0/m, z3.d, z0.d
; CHECK-NEXT: fmul z0.d, z1.d, z3.d
; CHECK-NEXT: fmul z1.d, z2.d, z3.d
; CHECK-NEXT: b foo_2_nxv2f64
entry:
%D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
%splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
%div = fdiv <vscale x 2 x double> %a, %splat
%div1 = fdiv <vscale x 2 x double> %b, %splat
tail call void @foo_2_nxv2f64(<vscale x 2 x double> %div, <vscale x 2 x double> %div1)
ret void
}
declare void @foo_3f(float, float, float)
declare void @foo_3d(double, double, double)
declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
declare void @foo_2f(float, float)
declare void @foo_2d(double, double)
declare void @foo_3_nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare void @foo_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
attributes #0 = { "unsafe-fp-math"="true" }
attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" }