InstCombine will hoist an fneg through an fmul, but not for fadd/fsub.
This prevents us from matching fmsub and fnmadd in some cases.
This patch adds a DAG combine to undo this in InstCombine, which helps
some hot loops in 508.namd_r:
@@ -983,18 +983,15 @@
fld ft2, 48(a5)
fld ft3, 64(a5)
fld ft4, 72(a5)
- fneg.d fa0, fa0
- fneg.d ft0, ft0
- fneg.d ft2, ft2
fmul.d fa3, ft5, fa3
fmul.d fa0, fa3, fa0
fmul.d ft0, fa3, ft0
fmul.d fa3, fa3, ft2
fld ft2, 0(s1)
fmul.d fa4, ft5, fa4
- fmadd.d fa2, fa4, fa2, fa0
- fmadd.d ft6, fa4, ft6, ft0
- fmadd.d fa4, fa4, ft1, fa3
+ fmsub.d fa2, fa4, fa2, fa0
+ fmsub.d ft6, fa4, ft6, ft0
+ fmsub.d fa4, fa4, ft1, fa3
This gives a [1.77% improvement in both instruction count and runtime on
508.namd_r](https://lnt.lukelau.me/db_default/v4/nts/profile/1/1022/1021)
This also causes some more fnegs to be sunk after a bitcast to integer,
so they're now done as xor. From glancing at some of the schedules for
WriteFSGN my guess is that this is also profitable.
1407 lines
44 KiB
LLVM
1407 lines
44 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
|
|
; RUN: -target-abi=ilp32f | FileCheck -check-prefix=CHECKIF %s
|
|
; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
|
|
; RUN: -target-abi=lp64f | FileCheck -check-prefix=CHECKIF %s
|
|
; RUN: llc -mtriple=riscv32 -mattr=+zfinx -verify-machineinstrs < %s \
|
|
; RUN: -target-abi=ilp32 | FileCheck -check-prefixes=CHECKIZFINX,RV32IZFINX %s
|
|
; RUN: llc -mtriple=riscv64 -mattr=+zfinx -verify-machineinstrs < %s \
|
|
; RUN: -target-abi=lp64 | FileCheck -check-prefixes=CHECKIZFINX,RV64IZFINX %s
|
|
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck -check-prefix=RV32I %s
|
|
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck -check-prefix=RV64I %s
|
|
|
|
; These tests are each targeted at a particular RISC-V FPU instruction.
|
|
; Compares and conversions can be found in float-fcmp.ll and float-convert.ll
|
|
; respectively. Some other float-*.ll files in this folder exercise LLVM IR
|
|
; instructions that don't directly match a RISC-V instruction.
|
|
|
|
define float @fadd_s(float %a, float %b) nounwind {
|
|
; CHECKIF-LABEL: fadd_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fadd.s fa0, fa0, fa1
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fadd_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a0, a0, a1
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fadd_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fadd_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = fadd float %a, %b
|
|
ret float %1
|
|
}
|
|
|
|
define float @fsub_s(float %a, float %b) nounwind {
|
|
; CHECKIF-LABEL: fsub_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fsub.s fa0, fa0, fa1
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fsub_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fsub.s a0, a0, a1
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fsub_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call __subsf3
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fsub_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call __subsf3
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = fsub float %a, %b
|
|
ret float %1
|
|
}
|
|
|
|
define float @fmul_s(float %a, float %b) nounwind {
|
|
; CHECKIF-LABEL: fmul_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmul.s fa0, fa0, fa1
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fmul_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fmul.s a0, a0, a1
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fmul_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call __mulsf3
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fmul_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call __mulsf3
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = fmul float %a, %b
|
|
ret float %1
|
|
}
|
|
|
|
define float @fdiv_s(float %a, float %b) nounwind {
|
|
; CHECKIF-LABEL: fdiv_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fdiv.s fa0, fa0, fa1
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fdiv_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fdiv.s a0, a0, a1
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fdiv_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call __divsf3
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fdiv_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call __divsf3
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = fdiv float %a, %b
|
|
ret float %1
|
|
}
|
|
|
|
declare float @llvm.sqrt.f32(float)
|
|
|
|
define float @fsqrt_s(float %a) nounwind {
|
|
; CHECKIF-LABEL: fsqrt_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fsqrt.s fa0, fa0
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fsqrt_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fsqrt.s a0, a0
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fsqrt_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call sqrtf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fsqrt_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call sqrtf
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = call float @llvm.sqrt.f32(float %a)
|
|
ret float %1
|
|
}
|
|
|
|
declare float @llvm.copysign.f32(float, float)
|
|
|
|
define float @fsgnj_s(float %a, float %b) nounwind {
|
|
; CHECKIF-LABEL: fsgnj_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fsgnj.s fa0, fa0, fa1
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fsgnj_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fsgnj.s a0, a0, a1
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fsgnj_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: lui a2, 524288
|
|
; RV32I-NEXT: slli a0, a0, 1
|
|
; RV32I-NEXT: and a1, a1, a2
|
|
; RV32I-NEXT: srli a0, a0, 1
|
|
; RV32I-NEXT: or a0, a0, a1
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fsgnj_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: lui a2, 524288
|
|
; RV64I-NEXT: slli a0, a0, 33
|
|
; RV64I-NEXT: and a1, a1, a2
|
|
; RV64I-NEXT: srli a0, a0, 33
|
|
; RV64I-NEXT: or a0, a0, a1
|
|
; RV64I-NEXT: ret
|
|
%1 = call float @llvm.copysign.f32(float %a, float %b)
|
|
ret float %1
|
|
}
|
|
|
|
define i32 @fneg_s(float %a, float %b) nounwind {
|
|
; CHECKIF-LABEL: fneg_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fadd.s fa5, fa0, fa0
|
|
; CHECKIF-NEXT: fneg.s fa4, fa5
|
|
; CHECKIF-NEXT: feq.s a0, fa5, fa4
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fneg_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a0, a0, a0
|
|
; CHECKIZFINX-NEXT: fneg.s a1, a0
|
|
; CHECKIZFINX-NEXT: feq.s a0, a0, a1
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fneg_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv a1, a0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: lui a1, 524288
|
|
; RV32I-NEXT: xor a1, a0, a1
|
|
; RV32I-NEXT: call __eqsf2
|
|
; RV32I-NEXT: seqz a0, a0
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fneg_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv a1, a0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: lui a1, 524288
|
|
; RV64I-NEXT: xor a1, a0, a1
|
|
; RV64I-NEXT: call __eqsf2
|
|
; RV64I-NEXT: seqz a0, a0
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = fadd float %a, %a
|
|
%2 = fneg float %1
|
|
%3 = fcmp oeq float %1, %2
|
|
%4 = zext i1 %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define float @fsgnjn_s(float %a, float %b) nounwind {
|
|
; CHECKIF-LABEL: fsgnjn_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fadd.s fa5, fa0, fa1
|
|
; CHECKIF-NEXT: fsgnjn.s fa0, fa0, fa5
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fsgnjn_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a1, a0, a1
|
|
; CHECKIZFINX-NEXT: fsgnjn.s a0, a0, a1
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fsgnjn_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: not a0, a0
|
|
; RV32I-NEXT: lui a1, 524288
|
|
; RV32I-NEXT: slli s0, s0, 1
|
|
; RV32I-NEXT: and a0, a0, a1
|
|
; RV32I-NEXT: srli s0, s0, 1
|
|
; RV32I-NEXT: or a0, s0, a0
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fsgnjn_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: not a0, a0
|
|
; RV64I-NEXT: lui a1, 524288
|
|
; RV64I-NEXT: slli s0, s0, 33
|
|
; RV64I-NEXT: and a0, a0, a1
|
|
; RV64I-NEXT: srli s0, s0, 33
|
|
; RV64I-NEXT: or a0, s0, a0
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = fadd float %a, %b
|
|
%2 = fneg float %1
|
|
%3 = call float @llvm.copysign.f32(float %a, float %2)
|
|
ret float %3
|
|
}
|
|
|
|
declare float @llvm.fabs.f32(float)
|
|
|
|
define float @fabs_s(float %a, float %b) nounwind {
|
|
; CHECKIF-LABEL: fabs_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fadd.s fa5, fa0, fa1
|
|
; CHECKIF-NEXT: fabs.s fa4, fa5
|
|
; CHECKIF-NEXT: fadd.s fa0, fa4, fa5
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fabs_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a0, a0, a1
|
|
; CHECKIZFINX-NEXT: fabs.s a1, a0
|
|
; CHECKIZFINX-NEXT: fadd.s a0, a1, a0
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fabs_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: mv a1, a0
|
|
; RV32I-NEXT: slli a0, a0, 1
|
|
; RV32I-NEXT: srli a0, a0, 1
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fabs_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: mv a1, a0
|
|
; RV64I-NEXT: slli a0, a0, 33
|
|
; RV64I-NEXT: srli a0, a0, 33
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = fadd float %a, %b
|
|
%2 = call float @llvm.fabs.f32(float %1)
|
|
%3 = fadd float %2, %1
|
|
ret float %3
|
|
}
|
|
|
|
declare float @llvm.minnum.f32(float, float)
|
|
|
|
define float @fmin_s(float %a, float %b) nounwind {
|
|
; CHECKIF-LABEL: fmin_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmin.s fa0, fa0, fa1
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fmin_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fmin.s a0, a0, a1
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fmin_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call fminf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fmin_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call fminf
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = call float @llvm.minnum.f32(float %a, float %b)
|
|
ret float %1
|
|
}
|
|
|
|
declare float @llvm.maxnum.f32(float, float)
|
|
|
|
define float @fmax_s(float %a, float %b) nounwind {
|
|
; CHECKIF-LABEL: fmax_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmax.s fa0, fa0, fa1
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fmax_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fmax.s a0, a0, a1
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fmax_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call fmaxf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fmax_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call fmaxf
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
ret float %1
|
|
}
|
|
|
|
declare float @llvm.fma.f32(float, float, float)
|
|
|
|
define float @fmadd_s(float %a, float %b, float %c) nounwind {
|
|
; CHECKIF-LABEL: fmadd_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmadd.s fa0, fa0, fa1, fa2
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fmadd_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fmadd.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fmadd_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call fmaf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fmadd_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call fmaf
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = call float @llvm.fma.f32(float %a, float %b, float %c)
|
|
ret float %1
|
|
}
|
|
|
|
define float @fmsub_s(float %a, float %b, float %c) nounwind {
|
|
; CHECKIF-LABEL: fmsub_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmv.w.x fa5, zero
|
|
; CHECKIF-NEXT: fadd.s fa5, fa2, fa5
|
|
; CHECKIF-NEXT: fmsub.s fa0, fa0, fa1, fa5
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fmsub_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a2, a2, zero
|
|
; CHECKIZFINX-NEXT: fmsub.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fmsub_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a1
|
|
; RV32I-NEXT: mv s1, a0
|
|
; RV32I-NEXT: mv a0, a2
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: lui a2, 524288
|
|
; RV32I-NEXT: xor a2, a0, a2
|
|
; RV32I-NEXT: mv a0, s1
|
|
; RV32I-NEXT: mv a1, s0
|
|
; RV32I-NEXT: call fmaf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fmsub_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a1
|
|
; RV64I-NEXT: mv s1, a0
|
|
; RV64I-NEXT: mv a0, a2
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: lui a2, 524288
|
|
; RV64I-NEXT: xor a2, a0, a2
|
|
; RV64I-NEXT: mv a0, s1
|
|
; RV64I-NEXT: mv a1, s0
|
|
; RV64I-NEXT: call fmaf
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%c_ = fadd float 0.0, %c ; avoid negation using xor
|
|
%negc = fsub float -0.0, %c_
|
|
%1 = call float @llvm.fma.f32(float %a, float %b, float %negc)
|
|
ret float %1
|
|
}
|
|
|
|
define float @fmsub_s_fmul_fneg(float %a, float %b, float %c, float %d) nounwind {
|
|
; CHECKIFD-LABEL: fmsub_d_fmul_fneg:
|
|
; CHECKIFD: # %bb.0:
|
|
; CHECKIFD-NEXT: fneg.d fa5, fa3
|
|
; CHECKIFD-NEXT: fmul.d fa5, fa2, fa5
|
|
; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa5
|
|
; CHECKIFD-NEXT: ret
|
|
;
|
|
; RV32IZFINXZDINX-LABEL: fmsub_d_fmul_fneg:
|
|
; RV32IZFINXZDINX: # %bb.0:
|
|
; RV32IZFINXZDINX-NEXT: fneg.d a6, a6
|
|
; RV32IZFINXZDINX-NEXT: fmul.d a4, a4, a6
|
|
; RV32IZFINXZDINX-NEXT: fmadd.d a0, a0, a2, a4
|
|
; RV32IZFINXZDINX-NEXT: ret
|
|
;
|
|
; RV64IZFINXZDINX-LABEL: fmsub_d_fmul_fneg:
|
|
; RV64IZFINXZDINX: # %bb.0:
|
|
; RV64IZFINXZDINX-NEXT: fneg.d a3, a3
|
|
; RV64IZFINXZDINX-NEXT: fmul.d a2, a2, a3
|
|
; RV64IZFINXZDINX-NEXT: fmadd.d a0, a0, a1, a2
|
|
; RV64IZFINXZDINX-NEXT: ret
|
|
;
|
|
; CHECKIF-LABEL: fmsub_s_fmul_fneg:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmul.s fa5, fa2, fa3
|
|
; CHECKIF-NEXT: fmsub.s fa0, fa0, fa1, fa5
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fmsub_s_fmul_fneg:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fmul.s a2, a2, a3
|
|
; CHECKIZFINX-NEXT: fmsub.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fmsub_s_fmul_fneg:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a1
|
|
; RV32I-NEXT: mv s1, a0
|
|
; RV32I-NEXT: lui a1, 524288
|
|
; RV32I-NEXT: xor a1, a3, a1
|
|
; RV32I-NEXT: mv a0, a2
|
|
; RV32I-NEXT: call __mulsf3
|
|
; RV32I-NEXT: mv a2, a0
|
|
; RV32I-NEXT: mv a0, s1
|
|
; RV32I-NEXT: mv a1, s0
|
|
; RV32I-NEXT: call fmaf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fmsub_s_fmul_fneg:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a1
|
|
; RV64I-NEXT: mv s1, a0
|
|
; RV64I-NEXT: lui a1, 524288
|
|
; RV64I-NEXT: xor a1, a3, a1
|
|
; RV64I-NEXT: mv a0, a2
|
|
; RV64I-NEXT: call __mulsf3
|
|
; RV64I-NEXT: mv a2, a0
|
|
; RV64I-NEXT: mv a0, s1
|
|
; RV64I-NEXT: mv a1, s0
|
|
; RV64I-NEXT: call fmaf
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%negd = fneg float %d
|
|
%fmul = fmul float %c, %negd
|
|
%1 = call float @llvm.fma.f32(float %a, float %b, float %fmul)
|
|
ret float %1
|
|
}
|
|
|
|
define float @fnmadd_s(float %a, float %b, float %c) nounwind {
|
|
; CHECKIF-LABEL: fnmadd_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmv.w.x fa5, zero
|
|
; CHECKIF-NEXT: fadd.s fa4, fa0, fa5
|
|
; CHECKIF-NEXT: fadd.s fa5, fa2, fa5
|
|
; CHECKIF-NEXT: fnmadd.s fa0, fa4, fa1, fa5
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fnmadd_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a0, a0, zero
|
|
; CHECKIZFINX-NEXT: fadd.s a2, a2, zero
|
|
; CHECKIZFINX-NEXT: fnmadd.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fnmadd_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a2
|
|
; RV32I-NEXT: mv s1, a1
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: mv s2, a0
|
|
; RV32I-NEXT: mv a0, s0
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: lui a2, 524288
|
|
; RV32I-NEXT: xor a1, s2, a2
|
|
; RV32I-NEXT: xor a2, a0, a2
|
|
; RV32I-NEXT: mv a0, a1
|
|
; RV32I-NEXT: mv a1, s1
|
|
; RV32I-NEXT: call fmaf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fnmadd_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a2
|
|
; RV64I-NEXT: mv s1, a1
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: mv s2, a0
|
|
; RV64I-NEXT: mv a0, s0
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: lui a2, 524288
|
|
; RV64I-NEXT: xor a1, s2, a2
|
|
; RV64I-NEXT: xor a2, a0, a2
|
|
; RV64I-NEXT: mv a0, a1
|
|
; RV64I-NEXT: mv a1, s1
|
|
; RV64I-NEXT: call fmaf
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%a_ = fadd float 0.0, %a
|
|
%c_ = fadd float 0.0, %c
|
|
%nega = fsub float -0.0, %a_
|
|
%negc = fsub float -0.0, %c_
|
|
%1 = call float @llvm.fma.f32(float %nega, float %b, float %negc)
|
|
ret float %1
|
|
}
|
|
|
|
define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
|
|
; CHECKIF-LABEL: fnmadd_s_2:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmv.w.x fa5, zero
|
|
; CHECKIF-NEXT: fadd.s fa4, fa1, fa5
|
|
; CHECKIF-NEXT: fadd.s fa5, fa2, fa5
|
|
; CHECKIF-NEXT: fnmadd.s fa0, fa4, fa0, fa5
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fnmadd_s_2:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a1, a1, zero
|
|
; CHECKIZFINX-NEXT: fadd.s a2, a2, zero
|
|
; CHECKIZFINX-NEXT: fnmadd.s a0, a1, a0, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fnmadd_s_2:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a2
|
|
; RV32I-NEXT: mv s1, a0
|
|
; RV32I-NEXT: mv a0, a1
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: mv s2, a0
|
|
; RV32I-NEXT: mv a0, s0
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: lui a2, 524288
|
|
; RV32I-NEXT: xor a1, s2, a2
|
|
; RV32I-NEXT: xor a2, a0, a2
|
|
; RV32I-NEXT: mv a0, s1
|
|
; RV32I-NEXT: call fmaf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fnmadd_s_2:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a2
|
|
; RV64I-NEXT: mv s1, a0
|
|
; RV64I-NEXT: mv a0, a1
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: mv s2, a0
|
|
; RV64I-NEXT: mv a0, s0
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: lui a2, 524288
|
|
; RV64I-NEXT: xor a1, s2, a2
|
|
; RV64I-NEXT: xor a2, a0, a2
|
|
; RV64I-NEXT: mv a0, s1
|
|
; RV64I-NEXT: call fmaf
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%b_ = fadd float 0.0, %b
|
|
%c_ = fadd float 0.0, %c
|
|
%negb = fsub float -0.0, %b_
|
|
%negc = fsub float -0.0, %c_
|
|
%1 = call float @llvm.fma.f32(float %a, float %negb, float %negc)
|
|
ret float %1
|
|
}
|
|
|
|
define float @fnmadd_s_3(float %a, float %b, float %c) nounwind {
|
|
; RV32IF-LABEL: fnmadd_s_3:
|
|
; RV32IF: # %bb.0:
|
|
; RV32IF-NEXT: fmadd.s ft0, fa0, fa1, fa2
|
|
; RV32IF-NEXT: fneg.s fa0, ft0
|
|
; RV32IF-NEXT: ret
|
|
;
|
|
; RV64IF-LABEL: fnmadd_s_3:
|
|
; RV64IF: # %bb.0:
|
|
; RV64IF-NEXT: fmadd.s ft0, fa0, fa1, fa2
|
|
; RV64IF-NEXT: fneg.s fa0, ft0
|
|
; RV64IF-NEXT: ret
|
|
;
|
|
; CHECKIF-LABEL: fnmadd_s_3:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmadd.s fa5, fa0, fa1, fa2
|
|
; CHECKIF-NEXT: fneg.s fa0, fa5
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fnmadd_s_3:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fmadd.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: fneg.s a0, a0
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fnmadd_s_3:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call fmaf
|
|
; RV32I-NEXT: lui a1, 524288
|
|
; RV32I-NEXT: xor a0, a0, a1
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fnmadd_s_3:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call fmaf
|
|
; RV64I-NEXT: lui a1, 524288
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = call float @llvm.fma.f32(float %a, float %b, float %c)
|
|
%neg = fneg float %1
|
|
ret float %neg
|
|
}
|
|
|
|
define float @fnmadd_s_fmul_fneg(float %a, float %b, float %c, float %d) nounwind {
|
|
; CHECKIFD-LABEL: fnmadd_d_fmul_fneg:
|
|
; CHECKIFD: # %bb.0:
|
|
; CHECKIFD-NEXT: fneg.d fa5, fa0
|
|
; CHECKIFD-NEXT: fmul.d fa5, fa1, fa5
|
|
; CHECKIFD-NEXT: fmadd.d fa0, fa2, fa3, fa5
|
|
; CHECKIFD-NEXT: ret
|
|
;
|
|
; RV32IZFINXZDINX-LABEL: fnmadd_d_fmul_fneg:
|
|
; RV32IZFINXZDINX: # %bb.0:
|
|
; RV32IZFINXZDINX-NEXT: fneg.d a0, a0
|
|
; RV32IZFINXZDINX-NEXT: fmul.d a0, a2, a0
|
|
; RV32IZFINXZDINX-NEXT: fmadd.d a0, a4, a6, a0
|
|
; RV32IZFINXZDINX-NEXT: ret
|
|
;
|
|
; RV64IZFINXZDINX-LABEL: fnmadd_d_fmul_fneg:
|
|
; RV64IZFINXZDINX: # %bb.0:
|
|
; RV64IZFINXZDINX-NEXT: fneg.d a0, a0
|
|
; RV64IZFINXZDINX-NEXT: fmul.d a0, a1, a0
|
|
; RV64IZFINXZDINX-NEXT: fmadd.d a0, a2, a3, a0
|
|
; RV64IZFINXZDINX-NEXT: ret
|
|
;
|
|
; CHECKIF-LABEL: fnmadd_s_fmul_fneg:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmul.s fa5, fa1, fa0
|
|
; CHECKIF-NEXT: fmsub.s fa0, fa2, fa3, fa5
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fnmadd_s_fmul_fneg:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fmul.s a0, a1, a0
|
|
; CHECKIZFINX-NEXT: fmsub.s a0, a2, a3, a0
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fnmadd_s_fmul_fneg:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a3
|
|
; RV32I-NEXT: mv s1, a2
|
|
; RV32I-NEXT: mv a2, a1
|
|
; RV32I-NEXT: lui a1, 524288
|
|
; RV32I-NEXT: xor a1, a0, a1
|
|
; RV32I-NEXT: mv a0, a2
|
|
; RV32I-NEXT: call __mulsf3
|
|
; RV32I-NEXT: mv a2, a0
|
|
; RV32I-NEXT: mv a0, s1
|
|
; RV32I-NEXT: mv a1, s0
|
|
; RV32I-NEXT: call fmaf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fnmadd_s_fmul_fneg:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a3
|
|
; RV64I-NEXT: mv s1, a2
|
|
; RV64I-NEXT: mv a2, a1
|
|
; RV64I-NEXT: lui a1, 524288
|
|
; RV64I-NEXT: xor a1, a0, a1
|
|
; RV64I-NEXT: mv a0, a2
|
|
; RV64I-NEXT: call __mulsf3
|
|
; RV64I-NEXT: mv a2, a0
|
|
; RV64I-NEXT: mv a0, s1
|
|
; RV64I-NEXT: mv a1, s0
|
|
; RV64I-NEXT: call fmaf
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%nega = fneg float %a
|
|
%mul = fmul float %b, %nega
|
|
%1 = call float @llvm.fma.f32(float %c, float %d, float %mul)
|
|
ret float %1
|
|
}
|
|
|
|
define float @fnmadd_nsz(float %a, float %b, float %c) nounwind {
|
|
; RV32IF-LABEL: fnmadd_nsz:
|
|
; RV32IF: # %bb.0:
|
|
; RV32IF-NEXT: fnmadd.s fa0, fa0, fa1, fa2
|
|
; RV32IF-NEXT: ret
|
|
;
|
|
; RV64IF-LABEL: fnmadd_nsz:
|
|
; RV64IF: # %bb.0:
|
|
; RV64IF-NEXT: fnmadd.s fa0, fa0, fa1, fa2
|
|
; RV64IF-NEXT: ret
|
|
;
|
|
; CHECKIF-LABEL: fnmadd_nsz:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fnmadd.s fa0, fa0, fa1, fa2
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fnmadd_nsz:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fnmadd.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fnmadd_nsz:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: call fmaf
|
|
; RV32I-NEXT: lui a1, 524288
|
|
; RV32I-NEXT: xor a0, a0, a1
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fnmadd_nsz:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: call fmaf
|
|
; RV64I-NEXT: lui a1, 524288
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = call nsz float @llvm.fma.f32(float %a, float %b, float %c)
|
|
%neg = fneg nsz float %1
|
|
ret float %neg
|
|
}
|
|
|
|
define float @fnmsub_s(float %a, float %b, float %c) nounwind {
|
|
; CHECKIF-LABEL: fnmsub_s:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmv.w.x fa5, zero
|
|
; CHECKIF-NEXT: fadd.s fa5, fa0, fa5
|
|
; CHECKIF-NEXT: fnmsub.s fa0, fa5, fa1, fa2
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fnmsub_s:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a0, a0, zero
|
|
; CHECKIZFINX-NEXT: fnmsub.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fnmsub_s:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a2
|
|
; RV32I-NEXT: mv s1, a1
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: lui a1, 524288
|
|
; RV32I-NEXT: xor a0, a0, a1
|
|
; RV32I-NEXT: mv a1, s1
|
|
; RV32I-NEXT: mv a2, s0
|
|
; RV32I-NEXT: call fmaf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fnmsub_s:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a2
|
|
; RV64I-NEXT: mv s1, a1
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: lui a1, 524288
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: mv a1, s1
|
|
; RV64I-NEXT: mv a2, s0
|
|
; RV64I-NEXT: call fmaf
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%a_ = fadd float 0.0, %a
|
|
%nega = fsub float -0.0, %a_
|
|
%1 = call float @llvm.fma.f32(float %nega, float %b, float %c)
|
|
ret float %1
|
|
}
|
|
|
|
define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
|
|
; CHECKIF-LABEL: fnmsub_s_2:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmv.w.x fa5, zero
|
|
; CHECKIF-NEXT: fadd.s fa5, fa1, fa5
|
|
; CHECKIF-NEXT: fnmsub.s fa0, fa5, fa0, fa2
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fnmsub_s_2:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a1, a1, zero
|
|
; CHECKIZFINX-NEXT: fnmsub.s a0, a1, a0, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fnmsub_s_2:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a2
|
|
; RV32I-NEXT: mv s1, a0
|
|
; RV32I-NEXT: mv a0, a1
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: lui a1, 524288
|
|
; RV32I-NEXT: xor a1, a0, a1
|
|
; RV32I-NEXT: mv a0, s1
|
|
; RV32I-NEXT: mv a2, s0
|
|
; RV32I-NEXT: call fmaf
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fnmsub_s_2:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a2
|
|
; RV64I-NEXT: mv s1, a0
|
|
; RV64I-NEXT: mv a0, a1
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: lui a1, 524288
|
|
; RV64I-NEXT: xor a1, a0, a1
|
|
; RV64I-NEXT: mv a0, s1
|
|
; RV64I-NEXT: mv a2, s0
|
|
; RV64I-NEXT: call fmaf
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%b_ = fadd float 0.0, %b
|
|
%negb = fsub float -0.0, %b_
|
|
%1 = call float @llvm.fma.f32(float %a, float %negb, float %c)
|
|
ret float %1
|
|
}
|
|
|
|
define float @fmadd_s_contract(float %a, float %b, float %c) nounwind {
|
|
; CHECKIF-LABEL: fmadd_s_contract:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmadd.s fa0, fa0, fa1, fa2
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fmadd_s_contract:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fmadd.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fmadd_s_contract:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a2
|
|
; RV32I-NEXT: call __mulsf3
|
|
; RV32I-NEXT: mv a1, s0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fmadd_s_contract:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a2
|
|
; RV64I-NEXT: call __mulsf3
|
|
; RV64I-NEXT: mv a1, s0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%1 = fmul contract float %a, %b
|
|
%2 = fadd contract float %1, %c
|
|
ret float %2
|
|
}
|
|
|
|
define float @fmsub_s_contract(float %a, float %b, float %c) nounwind {
|
|
; CHECKIF-LABEL: fmsub_s_contract:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmv.w.x fa5, zero
|
|
; CHECKIF-NEXT: fadd.s fa5, fa2, fa5
|
|
; CHECKIF-NEXT: fmsub.s fa0, fa0, fa1, fa5
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fmsub_s_contract:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a2, a2, zero
|
|
; CHECKIZFINX-NEXT: fmsub.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fmsub_s_contract:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a1
|
|
; RV32I-NEXT: mv s1, a0
|
|
; RV32I-NEXT: mv a0, a2
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: mv s2, a0
|
|
; RV32I-NEXT: mv a0, s1
|
|
; RV32I-NEXT: mv a1, s0
|
|
; RV32I-NEXT: call __mulsf3
|
|
; RV32I-NEXT: mv a1, s2
|
|
; RV32I-NEXT: call __subsf3
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fmsub_s_contract:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a1
|
|
; RV64I-NEXT: mv s1, a0
|
|
; RV64I-NEXT: mv a0, a2
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: mv s2, a0
|
|
; RV64I-NEXT: mv a0, s1
|
|
; RV64I-NEXT: mv a1, s0
|
|
; RV64I-NEXT: call __mulsf3
|
|
; RV64I-NEXT: mv a1, s2
|
|
; RV64I-NEXT: call __subsf3
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%c_ = fadd float 0.0, %c ; avoid negation using xor
|
|
%1 = fmul contract float %a, %b
|
|
%2 = fsub contract float %1, %c_
|
|
ret float %2
|
|
}
|
|
|
|
define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind {
|
|
; CHECKIF-LABEL: fnmadd_s_contract:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmv.w.x fa5, zero
|
|
; CHECKIF-NEXT: fadd.s fa4, fa0, fa5
|
|
; CHECKIF-NEXT: fadd.s fa3, fa1, fa5
|
|
; CHECKIF-NEXT: fadd.s fa5, fa2, fa5
|
|
; CHECKIF-NEXT: fnmadd.s fa0, fa4, fa3, fa5
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fnmadd_s_contract:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a0, a0, zero
|
|
; CHECKIZFINX-NEXT: fadd.s a1, a1, zero
|
|
; CHECKIZFINX-NEXT: fadd.s a2, a2, zero
|
|
; CHECKIZFINX-NEXT: fnmadd.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fnmadd_s_contract:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a2
|
|
; RV32I-NEXT: mv s1, a1
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: mv s2, a0
|
|
; RV32I-NEXT: mv a0, s1
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: mv s1, a0
|
|
; RV32I-NEXT: mv a0, s0
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: mv s0, a0
|
|
; RV32I-NEXT: mv a0, s2
|
|
; RV32I-NEXT: mv a1, s1
|
|
; RV32I-NEXT: call __mulsf3
|
|
; RV32I-NEXT: lui a1, 524288
|
|
; RV32I-NEXT: xor a0, a0, a1
|
|
; RV32I-NEXT: mv a1, s0
|
|
; RV32I-NEXT: call __subsf3
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fnmadd_s_contract:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a2
|
|
; RV64I-NEXT: mv s1, a1
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: mv s2, a0
|
|
; RV64I-NEXT: mv a0, s1
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: mv s1, a0
|
|
; RV64I-NEXT: mv a0, s0
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: mv s0, a0
|
|
; RV64I-NEXT: mv a0, s2
|
|
; RV64I-NEXT: mv a1, s1
|
|
; RV64I-NEXT: call __mulsf3
|
|
; RV64I-NEXT: lui a1, 524288
|
|
; RV64I-NEXT: xor a0, a0, a1
|
|
; RV64I-NEXT: mv a1, s0
|
|
; RV64I-NEXT: call __subsf3
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%a_ = fadd float 0.0, %a ; avoid negation using xor
|
|
%b_ = fadd float 0.0, %b ; avoid negation using xor
|
|
%c_ = fadd float 0.0, %c ; avoid negation using xor
|
|
%1 = fmul contract float %a_, %b_
|
|
%2 = fneg float %1
|
|
%3 = fsub contract float %2, %c_
|
|
ret float %3
|
|
}
|
|
|
|
define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind {
|
|
; CHECKIF-LABEL: fnmsub_s_contract:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fmv.w.x fa5, zero
|
|
; CHECKIF-NEXT: fadd.s fa4, fa0, fa5
|
|
; CHECKIF-NEXT: fadd.s fa5, fa1, fa5
|
|
; CHECKIF-NEXT: fnmsub.s fa0, fa4, fa5, fa2
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fnmsub_s_contract:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fadd.s a0, a0, zero
|
|
; CHECKIZFINX-NEXT: fadd.s a1, a1, zero
|
|
; CHECKIZFINX-NEXT: fnmsub.s a0, a0, a1, a2
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fnmsub_s_contract:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a2
|
|
; RV32I-NEXT: mv s1, a1
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: mv s2, a0
|
|
; RV32I-NEXT: mv a0, s1
|
|
; RV32I-NEXT: li a1, 0
|
|
; RV32I-NEXT: call __addsf3
|
|
; RV32I-NEXT: mv a1, a0
|
|
; RV32I-NEXT: mv a0, s2
|
|
; RV32I-NEXT: call __mulsf3
|
|
; RV32I-NEXT: mv a1, a0
|
|
; RV32I-NEXT: mv a0, s0
|
|
; RV32I-NEXT: call __subsf3
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fnmsub_s_contract:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a2
|
|
; RV64I-NEXT: mv s1, a1
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: mv s2, a0
|
|
; RV64I-NEXT: mv a0, s1
|
|
; RV64I-NEXT: li a1, 0
|
|
; RV64I-NEXT: call __addsf3
|
|
; RV64I-NEXT: mv a1, a0
|
|
; RV64I-NEXT: mv a0, s2
|
|
; RV64I-NEXT: call __mulsf3
|
|
; RV64I-NEXT: mv a1, a0
|
|
; RV64I-NEXT: mv a0, s0
|
|
; RV64I-NEXT: call __subsf3
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%a_ = fadd float 0.0, %a ; avoid negation using xor
|
|
%b_ = fadd float 0.0, %b ; avoid negation using xor
|
|
%1 = fmul contract float %a_, %b_
|
|
%2 = fsub contract float %c, %1
|
|
ret float %2
|
|
}
|
|
|
|
define float @fsgnjx_f32(float %x, float %y) nounwind {
|
|
; CHECKIF-LABEL: fsgnjx_f32:
|
|
; CHECKIF: # %bb.0:
|
|
; CHECKIF-NEXT: fsgnjx.s fa0, fa1, fa0
|
|
; CHECKIF-NEXT: ret
|
|
;
|
|
; CHECKIZFINX-LABEL: fsgnjx_f32:
|
|
; CHECKIZFINX: # %bb.0:
|
|
; CHECKIZFINX-NEXT: fsgnjx.s a0, a1, a0
|
|
; CHECKIZFINX-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: fsgnjx_f32:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: lui a2, 524288
|
|
; RV32I-NEXT: and a0, a0, a2
|
|
; RV32I-NEXT: lui a2, 260096
|
|
; RV32I-NEXT: or a0, a0, a2
|
|
; RV32I-NEXT: call __mulsf3
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: fsgnjx_f32:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: lui a2, 524288
|
|
; RV64I-NEXT: and a0, a0, a2
|
|
; RV64I-NEXT: lui a2, 260096
|
|
; RV64I-NEXT: or a0, a0, a2
|
|
; RV64I-NEXT: call __mulsf3
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: ret
|
|
%z = call float @llvm.copysign.f32(float 1.0, float %x)
|
|
%mul = fmul float %z, %y
|
|
ret float %mul
|
|
}
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; RV32IZFINX: {{.*}}
|
|
; RV64IZFINX: {{.*}}
|