
This moves the combine of fdiv by constant to fmul out of an 'if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()' block, so that it triggers if the divide is exact. An extra check for Recip.isDenormal() is added as multiple places make reference to it being unsafe or slow on certain platforms.
150 lines
4.2 KiB
LLVM
150 lines
4.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -mtriple=arm64-eabi -mattr=fullfp16,sve | FileCheck %s
|
|
|
|
define float @divf32_2(float %a) nounwind {
|
|
; CHECK-LABEL: divf32_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s1, #0.50000000
|
|
; CHECK-NEXT: fmul s0, s0, s1
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv float %a, 2.0
|
|
ret float %r
|
|
}
|
|
|
|
define float @divf32_2_arcp(float %a) nounwind {
|
|
; CHECK-LABEL: divf32_2_arcp:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s1, #0.50000000
|
|
; CHECK-NEXT: fmul s0, s0, s1
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv arcp float %a, 2.0
|
|
ret float %r
|
|
}
|
|
|
|
define float @divf32_p75(float %a) nounwind {
|
|
; CHECK-LABEL: divf32_p75:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s1, #0.75000000
|
|
; CHECK-NEXT: fdiv s0, s0, s1
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv float %a, 0.75
|
|
ret float %r
|
|
}
|
|
|
|
define float @divf32_p75_arcp(float %a) nounwind {
|
|
; CHECK-LABEL: divf32_p75_arcp:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #43691 // =0xaaab
|
|
; CHECK-NEXT: movk w8, #16298, lsl #16
|
|
; CHECK-NEXT: fmov s1, w8
|
|
; CHECK-NEXT: fmul s0, s0, s1
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv arcp float %a, 0.75
|
|
ret float %r
|
|
}
|
|
|
|
define half @divf16_2(half %a) nounwind {
|
|
; CHECK-LABEL: divf16_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov h1, #0.50000000
|
|
; CHECK-NEXT: fmul h0, h0, h1
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv half %a, 2.0
|
|
ret half %r
|
|
}
|
|
|
|
define half @divf16_32768(half %a) nounwind {
|
|
; CHECK-LABEL: divf16_32768:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #30720 // =0x7800
|
|
; CHECK-NEXT: fmov h1, w8
|
|
; CHECK-NEXT: fdiv h0, h0, h1
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv half %a, 32768.0
|
|
ret half %r
|
|
}
|
|
|
|
define half @divf16_32768_arcp(half %a) nounwind {
|
|
; CHECK-LABEL: divf16_32768_arcp:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #30720 // =0x7800
|
|
; CHECK-NEXT: fmov h1, w8
|
|
; CHECK-NEXT: fdiv h0, h0, h1
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv arcp half %a, 32768.0
|
|
ret half %r
|
|
}
|
|
|
|
define double @divf64_2(double %a) nounwind {
|
|
; CHECK-LABEL: divf64_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov d1, #0.50000000
|
|
; CHECK-NEXT: fmul d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv double %a, 2.0
|
|
ret double %r
|
|
}
|
|
|
|
define <4 x float> @divv4f32_2(<4 x float> %a) nounwind {
|
|
; CHECK-LABEL: divv4f32_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4s, #63, lsl #24
|
|
; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv <4 x float> %a, <float 2.0, float 2.0, float 2.0, float 2.0>
|
|
ret <4 x float> %r
|
|
}
|
|
|
|
define <4 x float> @divv4f32_2_arcp(<4 x float> %a) nounwind {
|
|
; CHECK-LABEL: divv4f32_2_arcp:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4s, #63, lsl #24
|
|
; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv arcp <4 x float> %a, <float 2.0, float 2.0, float 2.0, float 2.0>
|
|
ret <4 x float> %r
|
|
}
|
|
|
|
define <4 x float> @divv4f32_3(<4 x float> %a) nounwind {
|
|
; CHECK-LABEL: divv4f32_3:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov v1.4s, #3.00000000
|
|
; CHECK-NEXT: fdiv v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv <4 x float> %a, <float 3.0, float 3.0, float 3.0, float 3.0>
|
|
ret <4 x float> %r
|
|
}
|
|
|
|
define <4 x float> @divv4f32_3_arcp(<4 x float> %a) nounwind {
|
|
; CHECK-LABEL: divv4f32_3_arcp:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #43691 // =0xaaab
|
|
; CHECK-NEXT: movk w8, #16042, lsl #16
|
|
; CHECK-NEXT: dup v1.4s, w8
|
|
; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv arcp <4 x float> %a, <float 3.0, float 3.0, float 3.0, float 3.0>
|
|
ret <4 x float> %r
|
|
}
|
|
|
|
define <4 x float> @divv4f32_24816(<4 x float> %a) nounwind {
|
|
; CHECK-LABEL: divv4f32_24816:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI12_0
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
|
|
; CHECK-NEXT: fdiv v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv <4 x float> %a, <float 2.0, float 4.0, float 8.0, float 16.0>
|
|
ret <4 x float> %r
|
|
}
|
|
|
|
define <vscale x 4 x float> @divnxv4f32_2(<vscale x 4 x float> %a) nounwind {
|
|
; CHECK-LABEL: divnxv4f32_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5
|
|
; CHECK-NEXT: ret
|
|
%r = fdiv <vscale x 4 x float> %a, splat (float 2.0)
|
|
ret <vscale x 4 x float> %r
|
|
}
|