
This patch adds tranformation of fmul+fadd/fsub chains to fused multiply instructions: * fmul+fadd->fmadd * fmul+fsub->fmsub/fnmsub We also will try to combine these instructions if the fmul has more than one use and cannot be deleted. However, removing the dependence between fmul and fadd can still be profitable, and we rely on machine combiner approximations of scheduling. Differential Revision: https://reviews.llvm.org/D136764
107 lines
5.4 KiB
LLVM
107 lines
5.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs -mcpu=sifive-u74 \
|
|
; RUN: -O1 -riscv-enable-machine-combiner=true \
|
|
; RUN: -stop-after machine-combiner < %s | FileCheck %s
|
|
|
|
define double @test_reassoc_fadd1(double %a0, double %a1, double %a2, double %a3) {
|
|
; CHECK-LABEL: name: test_reassoc_fadd1
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $f10_d, $f11_d, $f12_d, $f13_d
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f13_d
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f12_d
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $f11_d
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $f10_d
|
|
; CHECK-NEXT: [[FADD_D:%[0-9]+]]:fpr64 = nsz reassoc nofpexcept FADD_D [[COPY3]], [[COPY2]], 7, implicit $frm
|
|
; CHECK-NEXT: [[FADD_D1:%[0-9]+]]:fpr64 = nsz reassoc nofpexcept FADD_D [[COPY1]], [[COPY]], 7, implicit $frm
|
|
; CHECK-NEXT: [[FADD_D2:%[0-9]+]]:fpr64 = nsz reassoc nofpexcept FADD_D killed [[FADD_D]], killed [[FADD_D1]], 7, implicit $frm
|
|
; CHECK-NEXT: $f10_d = COPY [[FADD_D2]]
|
|
; CHECK-NEXT: PseudoRET implicit $f10_d
|
|
%t0 = fadd nsz reassoc double %a0, %a1
|
|
%t1 = fadd nsz reassoc double %t0, %a2
|
|
%t2 = fadd nsz reassoc double %t1, %a3
|
|
ret double %t2
|
|
}
|
|
|
|
define double @test_reassoc_fmul1(double %a0, double %a1, double %a2, double %a3) {
|
|
; CHECK-LABEL: name: test_reassoc_fmul1
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $f10_d, $f11_d, $f12_d, $f13_d
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f13_d
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f12_d
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $f11_d
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $f10_d
|
|
; CHECK-NEXT: [[FMUL_D:%[0-9]+]]:fpr64 = nsz reassoc nofpexcept FMUL_D [[COPY3]], [[COPY2]], 7, implicit $frm
|
|
; CHECK-NEXT: [[FMUL_D1:%[0-9]+]]:fpr64 = nsz reassoc nofpexcept FMUL_D [[COPY1]], [[COPY]], 7, implicit $frm
|
|
; CHECK-NEXT: [[FMUL_D2:%[0-9]+]]:fpr64 = nsz reassoc nofpexcept FMUL_D killed [[FMUL_D]], killed [[FMUL_D1]], 7, implicit $frm
|
|
; CHECK-NEXT: $f10_d = COPY [[FMUL_D2]]
|
|
; CHECK-NEXT: PseudoRET implicit $f10_d
|
|
%t0 = fmul nsz reassoc double %a0, %a1
|
|
%t1 = fmul nsz reassoc double %t0, %a2
|
|
%t2 = fmul nsz reassoc double %t1, %a3
|
|
ret double %t2
|
|
}
|
|
|
|
; Verify flags intersection
|
|
define double @test_reassoc_flags1(double %a0, double %a1, double %a2, double %a3) {
|
|
; CHECK-LABEL: name: test_reassoc_flags1
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $f10_d, $f11_d, $f12_d, $f13_d
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f13_d
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f12_d
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $f11_d
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $f10_d
|
|
; CHECK-NEXT: [[FADD_D:%[0-9]+]]:fpr64 = nsz reassoc nofpexcept FADD_D [[COPY3]], [[COPY2]], 7, implicit $frm
|
|
; CHECK-NEXT: [[FADD_D1:%[0-9]+]]:fpr64 = nsz reassoc nofpexcept FADD_D [[COPY1]], [[COPY]], 7, implicit $frm
|
|
; CHECK-NEXT: [[FADD_D2:%[0-9]+]]:fpr64 = nsz reassoc nofpexcept FADD_D killed [[FADD_D]], killed [[FADD_D1]], 7, implicit $frm
|
|
; CHECK-NEXT: $f10_d = COPY [[FADD_D2]]
|
|
; CHECK-NEXT: PseudoRET implicit $f10_d
|
|
%t0 = fadd nsz reassoc double %a0, %a1
|
|
%t1 = fadd contract nsz reassoc double %t0, %a2
|
|
%t2 = fadd nsz reassoc double %t1, %a3
|
|
ret double %t2
|
|
}
|
|
|
|
; Verify flags intersection
|
|
define double @test_reassoc_flags2(double %a0, double %a1, double %a2, double %a3) {
|
|
; CHECK-LABEL: name: test_reassoc_flags2
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $f10_d, $f11_d, $f12_d, $f13_d
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f13_d
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f12_d
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $f11_d
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $f10_d
|
|
; CHECK-NEXT: [[FADD_D:%[0-9]+]]:fpr64 = nsz reassoc nofpexcept FADD_D [[COPY3]], [[COPY2]], 7, implicit $frm
|
|
; CHECK-NEXT: [[FADD_D1:%[0-9]+]]:fpr64 = nsz contract reassoc nofpexcept FADD_D [[COPY1]], [[COPY]], 7, implicit $frm
|
|
; CHECK-NEXT: [[FADD_D2:%[0-9]+]]:fpr64 = nsz contract reassoc nofpexcept FADD_D killed [[FADD_D]], killed [[FADD_D1]], 7, implicit $frm
|
|
; CHECK-NEXT: $f10_d = COPY [[FADD_D2]]
|
|
; CHECK-NEXT: PseudoRET implicit $f10_d
|
|
%t0 = fadd nsz reassoc double %a0, %a1
|
|
%t1 = fadd contract nsz reassoc double %t0, %a2
|
|
%t2 = fadd contract nsz reassoc double %t1, %a3
|
|
ret double %t2
|
|
}
|
|
|
|
; Verify FRM
|
|
define double @test_fmadd(double %a0, double %a1, double %a2) {
|
|
; CHECK-LABEL: name: test_fmadd
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $f10_d, $f11_d, $f12_d
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f12_d
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f11_d
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $f10_d
|
|
; CHECK-NEXT: [[FMUL_D:%[0-9]+]]:fpr64 = contract nofpexcept FMUL_D [[COPY2]], [[COPY1]], 7, implicit $frm
|
|
; CHECK-NEXT: [[FMADD_D:%[0-9]+]]:fpr64 = contract nofpexcept FMADD_D [[COPY2]], [[COPY1]], [[COPY]], 7, implicit $frm
|
|
; CHECK-NEXT: [[FDIV_D:%[0-9]+]]:fpr64 = nofpexcept FDIV_D killed [[FMADD_D]], [[FMUL_D]], 7, implicit $frm
|
|
; CHECK-NEXT: $f10_d = COPY [[FDIV_D]]
|
|
; CHECK-NEXT: PseudoRET implicit $f10_d
|
|
%t0 = fmul contract double %a0, %a1
|
|
%t1 = fadd contract double %t0, %a2
|
|
%t2 = fdiv double %t1, %t0
|
|
ret double %t2
|
|
}
|