
Re-landing #116970 after fixing miscompilation error. The original change made it possible for CMPZ to have multiple uses; `ARMDAGToDAGISel::SelectCMPZ` was not prepared for this. Pull Request: https://github.com/llvm/llvm-project/pull/118887 Original commit message: Following #116547 and #116676, this PR changes the type of results and operands of some nodes to accept / return a normal type instead of Glue. Unfortunately, changing the result type of one node requires changing the operand types of all potential consumer nodes, which in turn requires changing the result types of all other possible producer nodes. So this is a bulk change.
1152 lines
42 KiB
LLVM
1152 lines
42 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi, -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-MVE-FP
|
|
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi, -mattr=+mve.fp -fp-contract=fast -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-MVE-VMLA
|
|
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-MVE
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @vfma16_v1(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfma16_v1:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vadd.f16 q0, q0, q1
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfma16_v1:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vfma.f16 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfma16_v1:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmovx.f16 s13, s0
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s14, s4
|
|
; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s1
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s9
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s5
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4
|
|
; CHECK-MVE-NEXT: vmla.f16 s1, s5, s9
|
|
; CHECK-MVE-NEXT: vins.f16 s1, s12
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s2
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s6
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4
|
|
; CHECK-MVE-NEXT: vmla.f16 s2, s6, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s3
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s11
|
|
; CHECK-MVE-NEXT: vmovx.f16 s6, s7
|
|
; CHECK-MVE-NEXT: vmla.f16 s3, s7, s11
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s6, s4
|
|
; CHECK-MVE-NEXT: vins.f16 s0, s13
|
|
; CHECK-MVE-NEXT: vins.f16 s2, s12
|
|
; CHECK-MVE-NEXT: vins.f16 s3, s8
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <8 x half> %src2, %src3
|
|
%1 = fadd <8 x half> %src1, %0
|
|
ret <8 x half> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @vfma16_v2(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfma16_v2:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vadd.f16 q0, q1, q0
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfma16_v2:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vfma.f16 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfma16_v2:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmovx.f16 s13, s0
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s14, s4
|
|
; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s1
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s9
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s5
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4
|
|
; CHECK-MVE-NEXT: vmla.f16 s1, s5, s9
|
|
; CHECK-MVE-NEXT: vins.f16 s1, s12
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s2
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s6
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4
|
|
; CHECK-MVE-NEXT: vmla.f16 s2, s6, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s3
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s11
|
|
; CHECK-MVE-NEXT: vmovx.f16 s6, s7
|
|
; CHECK-MVE-NEXT: vmla.f16 s3, s7, s11
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s6, s4
|
|
; CHECK-MVE-NEXT: vins.f16 s0, s13
|
|
; CHECK-MVE-NEXT: vins.f16 s2, s12
|
|
; CHECK-MVE-NEXT: vins.f16 s3, s8
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <8 x half> %src2, %src3
|
|
%1 = fadd <8 x half> %0, %src1
|
|
ret <8 x half> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @vfms16(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfms16:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vsub.f16 q0, q0, q1
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfms16:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vfms.f16 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfms16:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmovx.f16 s13, s0
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s14, s4
|
|
; CHECK-MVE-NEXT: vmls.f16 s0, s4, s8
|
|
; CHECK-MVE-NEXT: vmls.f16 s13, s14, s12
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s1
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s9
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s5
|
|
; CHECK-MVE-NEXT: vmls.f16 s12, s8, s4
|
|
; CHECK-MVE-NEXT: vmls.f16 s1, s5, s9
|
|
; CHECK-MVE-NEXT: vins.f16 s1, s12
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s2
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s6
|
|
; CHECK-MVE-NEXT: vmls.f16 s12, s8, s4
|
|
; CHECK-MVE-NEXT: vmls.f16 s2, s6, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s3
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s11
|
|
; CHECK-MVE-NEXT: vmovx.f16 s6, s7
|
|
; CHECK-MVE-NEXT: vmls.f16 s3, s7, s11
|
|
; CHECK-MVE-NEXT: vmls.f16 s8, s6, s4
|
|
; CHECK-MVE-NEXT: vins.f16 s0, s13
|
|
; CHECK-MVE-NEXT: vins.f16 s2, s12
|
|
; CHECK-MVE-NEXT: vins.f16 s3, s8
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <8 x half> %src2, %src3
|
|
%1 = fsub <8 x half> %src1, %0
|
|
ret <8 x half> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @vfmar16(<8 x half> %src1, <8 x half> %src2, float %src3o) {
|
|
; CHECK-MVE-FP-LABEL: vfmar16:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8
|
|
; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, r0
|
|
; CHECK-MVE-FP-NEXT: vadd.f16 q0, q0, q1
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfmar16:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8
|
|
; CHECK-MVE-VMLA-NEXT: vfma.f16 q0, q1, r0
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfmar16:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
|
|
; CHECK-MVE-NEXT: vmovx.f16 s10, s4
|
|
; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s10, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s10, s1
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s5
|
|
; CHECK-MVE-NEXT: vmla.f16 s1, s5, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s10, s4, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s6
|
|
; CHECK-MVE-NEXT: vins.f16 s1, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s10, s2
|
|
; CHECK-MVE-NEXT: vmla.f16 s10, s4, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s2, s6, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s6, s3
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s7
|
|
; CHECK-MVE-NEXT: vmla.f16 s6, s4, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s3, s7, s8
|
|
; CHECK-MVE-NEXT: vins.f16 s0, s12
|
|
; CHECK-MVE-NEXT: vins.f16 s2, s10
|
|
; CHECK-MVE-NEXT: vins.f16 s3, s6
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%src3 = fptrunc float %src3o to half
|
|
%i = insertelement <8 x half> undef, half %src3, i32 0
|
|
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
|
|
%0 = fmul <8 x half> %src2, %sp
|
|
%1 = fadd <8 x half> %src1, %0
|
|
ret <8 x half> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @vfma16(<8 x half> %src1, <8 x half> %src2, float %src3o) {
|
|
; CHECK-MVE-FP-LABEL: vfma16:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-FP-NEXT: vmul.f16 q0, q0, q1
|
|
; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8
|
|
; CHECK-MVE-FP-NEXT: vadd.f16 q0, q0, r0
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfma16:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8
|
|
; CHECK-MVE-VMLA-NEXT: vfmas.f16 q0, q1, r0
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfma16:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmov q3, q0
|
|
; CHECK-MVE-NEXT: vcvtb.f16.f32 s3, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s0, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s2, s12
|
|
; CHECK-MVE-NEXT: vmov.f32 s8, s3
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s2, s0
|
|
; CHECK-MVE-NEXT: vmov.f32 s0, s3
|
|
; CHECK-MVE-NEXT: vmla.f16 s0, s12, s4
|
|
; CHECK-MVE-NEXT: vmov.f32 s1, s3
|
|
; CHECK-MVE-NEXT: vins.f16 s0, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s2, s5
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s13
|
|
; CHECK-MVE-NEXT: vmov.f32 s8, s3
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s4, s2
|
|
; CHECK-MVE-NEXT: vmla.f16 s1, s13, s5
|
|
; CHECK-MVE-NEXT: vins.f16 s1, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s2, s6
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s14
|
|
; CHECK-MVE-NEXT: vmov.f32 s8, s3
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s4, s2
|
|
; CHECK-MVE-NEXT: vmov.f32 s2, s3
|
|
; CHECK-MVE-NEXT: vmla.f16 s2, s14, s6
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s7
|
|
; CHECK-MVE-NEXT: vins.f16 s2, s8
|
|
; CHECK-MVE-NEXT: vmov.f32 s8, s3
|
|
; CHECK-MVE-NEXT: vmovx.f16 s6, s15
|
|
; CHECK-MVE-NEXT: vmla.f16 s3, s15, s7
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s6, s4
|
|
; CHECK-MVE-NEXT: vins.f16 s3, s8
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%src3 = fptrunc float %src3o to half
|
|
%i = insertelement <8 x half> undef, half %src3, i32 0
|
|
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
|
|
%0 = fmul <8 x half> %src1, %src2
|
|
%1 = fadd <8 x half> %sp, %0
|
|
ret <8 x half> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @vfma32_v1(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfma32_v1:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vadd.f32 q0, q0, q1
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfma32_v1:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vfma.f32 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfma32_v1:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmla.f32 s3, s7, s11
|
|
; CHECK-MVE-NEXT: vmla.f32 s2, s6, s10
|
|
; CHECK-MVE-NEXT: vmla.f32 s1, s5, s9
|
|
; CHECK-MVE-NEXT: vmla.f32 s0, s4, s8
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <4 x float> %src2, %src3
|
|
%1 = fadd <4 x float> %src1, %0
|
|
ret <4 x float> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @vfma32_v2(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfma32_v2:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vadd.f32 q0, q1, q0
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfma32_v2:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vfma.f32 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfma32_v2:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmla.f32 s3, s7, s11
|
|
; CHECK-MVE-NEXT: vmla.f32 s2, s6, s10
|
|
; CHECK-MVE-NEXT: vmla.f32 s1, s5, s9
|
|
; CHECK-MVE-NEXT: vmla.f32 s0, s4, s8
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <4 x float> %src2, %src3
|
|
%1 = fadd <4 x float> %0, %src1
|
|
ret <4 x float> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @vfms32(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfms32:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vsub.f32 q0, q0, q1
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfms32:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vfms.f32 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfms32:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmls.f32 s3, s7, s11
|
|
; CHECK-MVE-NEXT: vmls.f32 s2, s6, s10
|
|
; CHECK-MVE-NEXT: vmls.f32 s1, s5, s9
|
|
; CHECK-MVE-NEXT: vmls.f32 s0, s4, s8
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <4 x float> %src2, %src3
|
|
%1 = fsub <4 x float> %src1, %0
|
|
ret <4 x float> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @vfmar32(<4 x float> %src1, <4 x float> %src2, float %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfmar32:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmov r0, s8
|
|
; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, r0
|
|
; CHECK-MVE-FP-NEXT: vadd.f32 q0, q0, q1
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfmar32:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vmov r0, s8
|
|
; CHECK-MVE-VMLA-NEXT: vfma.f32 q0, q1, r0
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfmar32:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmla.f32 s3, s7, s8
|
|
; CHECK-MVE-NEXT: vmla.f32 s2, s6, s8
|
|
; CHECK-MVE-NEXT: vmla.f32 s1, s5, s8
|
|
; CHECK-MVE-NEXT: vmla.f32 s0, s4, s8
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%i = insertelement <4 x float> undef, float %src3, i32 0
|
|
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%0 = fmul <4 x float> %src2, %sp
|
|
%1 = fadd <4 x float> %src1, %0
|
|
ret <4 x float> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @vfmas32(<4 x float> %src1, <4 x float> %src2, float %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfmas32:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmov r0, s8
|
|
; CHECK-MVE-FP-NEXT: vmul.f32 q0, q0, q1
|
|
; CHECK-MVE-FP-NEXT: vadd.f32 q0, q0, r0
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfmas32:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vmov r0, s8
|
|
; CHECK-MVE-VMLA-NEXT: vfmas.f32 q0, q1, r0
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfmas32:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmov.f32 s11, s8
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s8
|
|
; CHECK-MVE-NEXT: vmov.f32 s9, s8
|
|
; CHECK-MVE-NEXT: vmla.f32 s8, s0, s4
|
|
; CHECK-MVE-NEXT: vmla.f32 s11, s3, s7
|
|
; CHECK-MVE-NEXT: vmla.f32 s10, s2, s6
|
|
; CHECK-MVE-NEXT: vmla.f32 s9, s1, s5
|
|
; CHECK-MVE-NEXT: vmov q0, q2
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%i = insertelement <4 x float> undef, float %src3, i32 0
|
|
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%0 = fmul <4 x float> %src1, %src2
|
|
%1 = fadd <4 x float> %sp, %0
|
|
ret <4 x float> %1
|
|
}
|
|
|
|
|
|
; Predicated version of the same tests
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @vfma16_v1_pred(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfma16_v1_pred:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f16 q2, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr
|
|
; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q0, q2
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfma16_v1_pred:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
|
|
; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfma16_v1_pred:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmovx.f16 s14, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s13, s0
|
|
; CHECK-MVE-NEXT: vcmp.f16 s14, #0
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s8
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s15, s13
|
|
; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s0
|
|
; CHECK-MVE-NEXT: vmla.f16 s14, s4, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s5
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s9
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f16 s8, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vins.f16 s0, s12
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s1
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s12
|
|
; CHECK-MVE-NEXT: vcmp.f16 s5, #0
|
|
; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4
|
|
; CHECK-MVE-NEXT: vmov.f32 s8, s1
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s5, s9
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s2
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s12
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s6
|
|
; CHECK-MVE-NEXT: vcmp.f16 s8, #0
|
|
; CHECK-MVE-NEXT: vins.f16 s1, s4
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s10
|
|
; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4
|
|
; CHECK-MVE-NEXT: vcmp.f16 s6, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s8, s2
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s6, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s6, s7
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f16 s6, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s8
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s3
|
|
; CHECK-MVE-NEXT: vins.f16 s2, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s11
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s10, s6, s4
|
|
; CHECK-MVE-NEXT: vcmp.f16 s7, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s6, s3
|
|
; CHECK-MVE-NEXT: vmla.f16 s6, s7, s11
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s10
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6
|
|
; CHECK-MVE-NEXT: vins.f16 s3, s4
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <8 x half> %src2, %src3
|
|
%1 = fadd <8 x half> %src1, %0
|
|
%c = fcmp olt <8 x half> %src2, zeroinitializer
|
|
%s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1
|
|
ret <8 x half> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @vfma16_v2_pred(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfma16_v2_pred:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f16 q2, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr
|
|
; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q2, q0
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfma16_v2_pred:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
|
|
; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfma16_v2_pred:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmovx.f16 s14, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s13, s0
|
|
; CHECK-MVE-NEXT: vcmp.f16 s14, #0
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s8
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s15, s13
|
|
; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s0
|
|
; CHECK-MVE-NEXT: vmla.f16 s14, s4, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s5
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s9
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f16 s8, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vins.f16 s0, s12
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s1
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s12
|
|
; CHECK-MVE-NEXT: vcmp.f16 s5, #0
|
|
; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4
|
|
; CHECK-MVE-NEXT: vmov.f32 s8, s1
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s5, s9
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s2
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s12
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s6
|
|
; CHECK-MVE-NEXT: vcmp.f16 s8, #0
|
|
; CHECK-MVE-NEXT: vins.f16 s1, s4
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s10
|
|
; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4
|
|
; CHECK-MVE-NEXT: vcmp.f16 s6, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s8, s2
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s6, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s6, s7
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f16 s6, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s8
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s3
|
|
; CHECK-MVE-NEXT: vins.f16 s2, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s11
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s10, s6, s4
|
|
; CHECK-MVE-NEXT: vcmp.f16 s7, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s6, s3
|
|
; CHECK-MVE-NEXT: vmla.f16 s6, s7, s11
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s10
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6
|
|
; CHECK-MVE-NEXT: vins.f16 s3, s4
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <8 x half> %src2, %src3
|
|
%1 = fadd <8 x half> %0, %src1
|
|
%c = fcmp olt <8 x half> %src2, zeroinitializer
|
|
%s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1
|
|
ret <8 x half> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @vfms16_pred(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfms16_pred:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f16 q2, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr
|
|
; CHECK-MVE-FP-NEXT: vsubt.f16 q0, q0, q2
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfms16_pred:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
|
|
; CHECK-MVE-VMLA-NEXT: vfmst.f16 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfms16_pred:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmovx.f16 s14, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s13, s0
|
|
; CHECK-MVE-NEXT: vcmp.f16 s14, #0
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s8
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s15, s13
|
|
; CHECK-MVE-NEXT: vmls.f16 s15, s14, s12
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s0
|
|
; CHECK-MVE-NEXT: vmls.f16 s14, s4, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s5
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s9
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f16 s8, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vins.f16 s0, s12
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s1
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s12
|
|
; CHECK-MVE-NEXT: vcmp.f16 s5, #0
|
|
; CHECK-MVE-NEXT: vmls.f16 s14, s8, s4
|
|
; CHECK-MVE-NEXT: vmov.f32 s8, s1
|
|
; CHECK-MVE-NEXT: vmls.f16 s8, s5, s9
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s2
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s12
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s6
|
|
; CHECK-MVE-NEXT: vcmp.f16 s8, #0
|
|
; CHECK-MVE-NEXT: vins.f16 s1, s4
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s10
|
|
; CHECK-MVE-NEXT: vmls.f16 s14, s8, s4
|
|
; CHECK-MVE-NEXT: vcmp.f16 s6, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s8, s2
|
|
; CHECK-MVE-NEXT: vmls.f16 s8, s6, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s6, s7
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f16 s6, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s8
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmovx.f16 s8, s3
|
|
; CHECK-MVE-NEXT: vins.f16 s2, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s11
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s8
|
|
; CHECK-MVE-NEXT: vmls.f16 s10, s6, s4
|
|
; CHECK-MVE-NEXT: vcmp.f16 s7, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s6, s3
|
|
; CHECK-MVE-NEXT: vmls.f16 s6, s7, s11
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s10
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6
|
|
; CHECK-MVE-NEXT: vins.f16 s3, s4
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <8 x half> %src2, %src3
|
|
%1 = fsub <8 x half> %src1, %0
|
|
%c = fcmp olt <8 x half> %src2, zeroinitializer
|
|
%s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1
|
|
ret <8 x half> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @vfmar16_pred(<8 x half> %src1, <8 x half> %src2, float %src3o) {
|
|
; CHECK-MVE-FP-LABEL: vfmar16_pred:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-FP-NEXT: vcmp.f16 lt, q1, zr
|
|
; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8
|
|
; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, r0
|
|
; CHECK-MVE-FP-NEXT: vpst
|
|
; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q0, q1
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfmar16_pred:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8
|
|
; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
|
|
; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, r0
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfmar16_pred:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmovx.f16 s10, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
|
|
; CHECK-MVE-NEXT: vcmp.f16 s10, #0
|
|
; CHECK-MVE-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s12
|
|
; CHECK-MVE-NEXT: vmla.f16 s14, s10, s8
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s10, s12, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s0
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s5
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s12
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vins.f16 s0, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s10, s1
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s10
|
|
; CHECK-MVE-NEXT: vcmp.f16 s5, #0
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s1
|
|
; CHECK-MVE-NEXT: vmla.f16 s10, s5, s8
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s10, s2
|
|
; CHECK-MVE-NEXT: vins.f16 s1, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s6
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s10
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8
|
|
; CHECK-MVE-NEXT: vcmp.f16 s6, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s2
|
|
; CHECK-MVE-NEXT: vmla.f16 s10, s6, s8
|
|
; CHECK-MVE-NEXT: vmovx.f16 s6, s3
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s10
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s6
|
|
; CHECK-MVE-NEXT: vins.f16 s2, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s7
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: vmla.f16 s10, s4, s8
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f16 s7, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s10
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s6, s3
|
|
; CHECK-MVE-NEXT: vmla.f16 s6, s7, s8
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6
|
|
; CHECK-MVE-NEXT: vins.f16 s3, s4
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%src3 = fptrunc float %src3o to half
|
|
%i = insertelement <8 x half> undef, half %src3, i32 0
|
|
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
|
|
%0 = fmul <8 x half> %src2, %sp
|
|
%1 = fadd <8 x half> %src1, %0
|
|
%c = fcmp olt <8 x half> %src2, zeroinitializer
|
|
%s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1
|
|
ret <8 x half> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @vfma16_pred(<8 x half> %src1, <8 x half> %src2, float %src3o) {
|
|
; CHECK-MVE-FP-LABEL: vfma16_pred:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8
|
|
; CHECK-MVE-FP-NEXT: vmul.f16 q2, q0, q1
|
|
; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr
|
|
; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q2, r0
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfma16_pred:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8
|
|
; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
|
|
; CHECK-MVE-VMLA-NEXT: vfmast.f16 q0, q1, r0
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfma16_pred:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vmovx.f16 s10, s4
|
|
; CHECK-MVE-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-MVE-NEXT: vcmp.f16 s10, #0
|
|
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s14, s12, s10
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s10, s12, s14
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s0, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s5
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s12
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vins.f16 s0, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s10, s1
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s8
|
|
; CHECK-MVE-NEXT: vcmp.f16 s5, #0
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s10, s4
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s10, s1, s5
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s8
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s10
|
|
; CHECK-MVE-NEXT: vmovx.f16 s10, s2
|
|
; CHECK-MVE-NEXT: vins.f16 s1, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s6
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: vmla.f16 s12, s10, s4
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f16 s6, #0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s8
|
|
; CHECK-MVE-NEXT: vmla.f16 s10, s2, s6
|
|
; CHECK-MVE-NEXT: vmovx.f16 s6, s3
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s10
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s8
|
|
; CHECK-MVE-NEXT: vins.f16 s2, s4
|
|
; CHECK-MVE-NEXT: vmovx.f16 s4, s7
|
|
; CHECK-MVE-NEXT: vcmp.f16 s4, #0
|
|
; CHECK-MVE-NEXT: vmla.f16 s10, s6, s4
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f16 s7, #0
|
|
; CHECK-MVE-NEXT: vmla.f16 s8, s3, s7
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s10
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s8
|
|
; CHECK-MVE-NEXT: vins.f16 s3, s4
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%src3 = fptrunc float %src3o to half
|
|
%i = insertelement <8 x half> undef, half %src3, i32 0
|
|
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
|
|
%0 = fmul <8 x half> %src1, %src2
|
|
%1 = fadd <8 x half> %sp, %0
|
|
%c = fcmp olt <8 x half> %src2, zeroinitializer
|
|
%s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1
|
|
ret <8 x half> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfma32_v1_pred:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f32 q2, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr
|
|
; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q0, q2
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfma32_v1_pred:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
|
|
; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfma32_v1_pred:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vcmp.f32 s4, #0
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s7, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s2
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s3
|
|
; CHECK-MVE-NEXT: vmla.f32 s12, s6, s10
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s1
|
|
; CHECK-MVE-NEXT: vmla.f32 s14, s7, s11
|
|
; CHECK-MVE-NEXT: vmla.f32 s10, s5, s9
|
|
; CHECK-MVE-NEXT: vmov.f32 s9, s0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s5, #0
|
|
; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8
|
|
; CHECK-MVE-NEXT: cset r1, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s6, #0
|
|
; CHECK-MVE-NEXT: cset r2, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: cset r3, mi
|
|
; CHECK-MVE-NEXT: cmp r3, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12
|
|
; CHECK-MVE-NEXT: cmp r2, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10
|
|
; CHECK-MVE-NEXT: cmp r1, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <4 x float> %src2, %src3
|
|
%1 = fadd <4 x float> %src1, %0
|
|
%c = fcmp olt <4 x float> %src2, zeroinitializer
|
|
%s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1
|
|
ret <4 x float> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfma32_v2_pred:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f32 q2, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr
|
|
; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q2, q0
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfma32_v2_pred:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
|
|
; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfma32_v2_pred:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vcmp.f32 s4, #0
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s7, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s2
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s3
|
|
; CHECK-MVE-NEXT: vmla.f32 s12, s6, s10
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s1
|
|
; CHECK-MVE-NEXT: vmla.f32 s14, s7, s11
|
|
; CHECK-MVE-NEXT: vmla.f32 s10, s5, s9
|
|
; CHECK-MVE-NEXT: vmov.f32 s9, s0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s5, #0
|
|
; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8
|
|
; CHECK-MVE-NEXT: cset r1, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s6, #0
|
|
; CHECK-MVE-NEXT: cset r2, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: cset r3, mi
|
|
; CHECK-MVE-NEXT: cmp r3, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12
|
|
; CHECK-MVE-NEXT: cmp r2, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10
|
|
; CHECK-MVE-NEXT: cmp r1, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <4 x float> %src2, %src3
|
|
%1 = fadd <4 x float> %0, %src1
|
|
%c = fcmp olt <4 x float> %src2, zeroinitializer
|
|
%s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1
|
|
ret <4 x float> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfms32_pred:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmul.f32 q2, q1, q2
|
|
; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr
|
|
; CHECK-MVE-FP-NEXT: vsubt.f32 q0, q0, q2
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfms32_pred:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
|
|
; CHECK-MVE-VMLA-NEXT: vfmst.f32 q0, q1, q2
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfms32_pred:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vcmp.f32 s4, #0
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s7, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s2
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s3
|
|
; CHECK-MVE-NEXT: vmls.f32 s12, s6, s10
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s1
|
|
; CHECK-MVE-NEXT: vmls.f32 s14, s7, s11
|
|
; CHECK-MVE-NEXT: vmls.f32 s10, s5, s9
|
|
; CHECK-MVE-NEXT: vmov.f32 s9, s0
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s5, #0
|
|
; CHECK-MVE-NEXT: vmls.f32 s9, s4, s8
|
|
; CHECK-MVE-NEXT: cset r1, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s6, #0
|
|
; CHECK-MVE-NEXT: cset r2, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: cset r3, mi
|
|
; CHECK-MVE-NEXT: cmp r3, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12
|
|
; CHECK-MVE-NEXT: cmp r2, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10
|
|
; CHECK-MVE-NEXT: cmp r1, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%0 = fmul <4 x float> %src2, %src3
|
|
%1 = fsub <4 x float> %src1, %0
|
|
%c = fcmp olt <4 x float> %src2, zeroinitializer
|
|
%s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1
|
|
ret <4 x float> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @vfmar32_pred(<4 x float> %src1, <4 x float> %src2, float %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfmar32_pred:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmov r0, s8
|
|
; CHECK-MVE-FP-NEXT: vcmp.f32 lt, q1, zr
|
|
; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, r0
|
|
; CHECK-MVE-FP-NEXT: vpst
|
|
; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q0, q1
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfmar32_pred:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vmov r0, s8
|
|
; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
|
|
; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, r0
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfmar32_pred:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vcmp.f32 s4, #0
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s7, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s2
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s1
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s3
|
|
; CHECK-MVE-NEXT: vmov.f32 s9, s0
|
|
; CHECK-MVE-NEXT: vmla.f32 s10, s6, s8
|
|
; CHECK-MVE-NEXT: vmla.f32 s12, s5, s8
|
|
; CHECK-MVE-NEXT: vmla.f32 s14, s7, s8
|
|
; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s5, #0
|
|
; CHECK-MVE-NEXT: cset r1, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s6, #0
|
|
; CHECK-MVE-NEXT: cset r2, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: cset r3, mi
|
|
; CHECK-MVE-NEXT: cmp r3, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s10
|
|
; CHECK-MVE-NEXT: cmp r2, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12
|
|
; CHECK-MVE-NEXT: cmp r1, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%i = insertelement <4 x float> undef, float %src3, i32 0
|
|
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%0 = fmul <4 x float> %src2, %sp
|
|
%1 = fadd <4 x float> %src1, %0
|
|
%c = fcmp olt <4 x float> %src2, zeroinitializer
|
|
%s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1
|
|
ret <4 x float> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @vfmas32_pred(<4 x float> %src1, <4 x float> %src2, float %src3) {
|
|
; CHECK-MVE-FP-LABEL: vfmas32_pred:
|
|
; CHECK-MVE-FP: @ %bb.0: @ %entry
|
|
; CHECK-MVE-FP-NEXT: vmov r0, s8
|
|
; CHECK-MVE-FP-NEXT: vmul.f32 q2, q0, q1
|
|
; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr
|
|
; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q2, r0
|
|
; CHECK-MVE-FP-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-VMLA-LABEL: vfmas32_pred:
|
|
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
|
|
; CHECK-MVE-VMLA-NEXT: vmov r0, s8
|
|
; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
|
|
; CHECK-MVE-VMLA-NEXT: vfmast.f32 q0, q1, r0
|
|
; CHECK-MVE-VMLA-NEXT: bx lr
|
|
;
|
|
; CHECK-MVE-LABEL: vfmas32_pred:
|
|
; CHECK-MVE: @ %bb.0: @ %entry
|
|
; CHECK-MVE-NEXT: vcmp.f32 s4, #0
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s7, #0
|
|
; CHECK-MVE-NEXT: vmov.f32 s10, s8
|
|
; CHECK-MVE-NEXT: vmov.f32 s12, s8
|
|
; CHECK-MVE-NEXT: vmov.f32 s14, s8
|
|
; CHECK-MVE-NEXT: vmla.f32 s8, s0, s4
|
|
; CHECK-MVE-NEXT: vmla.f32 s10, s2, s6
|
|
; CHECK-MVE-NEXT: vmla.f32 s12, s1, s5
|
|
; CHECK-MVE-NEXT: vmla.f32 s14, s3, s7
|
|
; CHECK-MVE-NEXT: cset r0, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s5, #0
|
|
; CHECK-MVE-NEXT: cset r1, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: vcmp.f32 s6, #0
|
|
; CHECK-MVE-NEXT: cset r2, mi
|
|
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
|
; CHECK-MVE-NEXT: cset r3, mi
|
|
; CHECK-MVE-NEXT: cmp r3, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s10
|
|
; CHECK-MVE-NEXT: cmp r2, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12
|
|
; CHECK-MVE-NEXT: cmp r1, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14
|
|
; CHECK-MVE-NEXT: cmp r0, #0
|
|
; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s8
|
|
; CHECK-MVE-NEXT: bx lr
|
|
entry:
|
|
%i = insertelement <4 x float> undef, float %src3, i32 0
|
|
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%0 = fmul <4 x float> %src1, %src2
|
|
%1 = fadd <4 x float> %sp, %0
|
|
%c = fcmp olt <4 x float> %src2, zeroinitializer
|
|
%s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1
|
|
ret <4 x float> %s
|
|
}
|