[AMDGPU] Codegen support for v_fmaak_f64/f_fmamk_f64 (#148734)
This commit is contained in:
parent
2c6771889a
commit
cbba8f0acb
@ -3513,6 +3513,10 @@ static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc) {
|
||||
? AMDGPU::V_FMAAK_F16_t16
|
||||
: AMDGPU::V_FMAAK_F16_fake16
|
||||
: AMDGPU::V_FMAAK_F16;
|
||||
case AMDGPU::V_FMAC_F64_e32:
|
||||
case AMDGPU::V_FMAC_F64_e64:
|
||||
case AMDGPU::V_FMA_F64_e64:
|
||||
return AMDGPU::V_FMAAK_F64;
|
||||
default:
|
||||
llvm_unreachable("invalid instruction");
|
||||
}
|
||||
@ -3541,6 +3545,10 @@ static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc) {
|
||||
? AMDGPU::V_FMAMK_F16_t16
|
||||
: AMDGPU::V_FMAMK_F16_fake16
|
||||
: AMDGPU::V_FMAMK_F16;
|
||||
case AMDGPU::V_FMAC_F64_e32:
|
||||
case AMDGPU::V_FMAC_F64_e64:
|
||||
case AMDGPU::V_FMA_F64_e64:
|
||||
return AMDGPU::V_FMAMK_F64;
|
||||
default:
|
||||
llvm_unreachable("invalid instruction");
|
||||
}
|
||||
@ -3619,7 +3627,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
|
||||
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
|
||||
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
|
||||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
|
||||
Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
|
||||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMA_F64_e64 ||
|
||||
Opc == AMDGPU::V_FMAC_F64_e64) {
|
||||
// Don't fold if we are using source or output modifiers. The new VOP2
|
||||
// instructions don't have them.
|
||||
if (hasAnyModifiersSet(UseMI))
|
||||
@ -3691,7 +3700,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
|
||||
|
||||
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
|
||||
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
|
||||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
|
||||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
|
||||
Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
|
||||
UseMI.untieRegOperand(
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
|
||||
|
||||
@ -3759,7 +3769,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
|
||||
|
||||
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
|
||||
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
|
||||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
|
||||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
|
||||
Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
|
||||
UseMI.untieRegOperand(
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
|
||||
|
||||
@ -4080,8 +4091,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
|
||||
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
|
||||
const MachineOperand *OpSel = getNamedOperand(MI, AMDGPU::OpName::op_sel);
|
||||
|
||||
if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
|
||||
!IsLegacy &&
|
||||
if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
|
||||
(!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
|
||||
// If we have an SGPR input, we will violate the constant bus restriction.
|
||||
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
|
||||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
|
||||
|
@ -463,6 +463,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
|
||||
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
|
||||
NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
|
||||
break;
|
||||
case AMDGPU::V_FMA_F64_e64:
|
||||
if (ST->hasFmaakFmamkF64Insts())
|
||||
NewOpcode = AMDGPU::V_FMAAK_F64;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -497,6 +501,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
|
||||
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
|
||||
NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
|
||||
break;
|
||||
case AMDGPU::V_FMA_F64_e64:
|
||||
if (ST->hasFmaakFmamkF64Insts())
|
||||
NewOpcode = AMDGPU::V_FMAMK_F64;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -961,7 +969,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
|
||||
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
|
||||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
|
||||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
|
||||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
|
||||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
|
||||
(MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
|
||||
ST->hasFmaakFmamkF64Insts())) {
|
||||
shrinkMadFma(MI);
|
||||
continue;
|
||||
}
|
||||
|
@ -256,17 +256,28 @@ define amdgpu_ps <2 x float> @v_lshl_add_u64(i64 %a) {
|
||||
; No folding into VOP2 promoted to VOP3
|
||||
|
||||
define amdgpu_ps <2 x float> @v_fma_f64(double %a, double %b) {
|
||||
; GCN-LABEL: v_fma_f64:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
|
||||
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; GCN-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
|
||||
; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
|
||||
; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3]
|
||||
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GCN-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
|
||||
; GCN-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
; GCN-SDAG-LABEL: v_fma_f64:
|
||||
; GCN-SDAG: ; %bb.0:
|
||||
; GCN-SDAG-NEXT: v_fmaak_f64 v[4:5], v[0:1], v[2:3], lit64(0x4063233333333333)
|
||||
; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
|
||||
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GCN-SDAG-NEXT: v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
|
||||
; GCN-SDAG-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
|
||||
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
|
||||
; GCN-SDAG-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GCN-GISEL-LABEL: v_fma_f64:
|
||||
; GCN-GISEL: ; %bb.0:
|
||||
; GCN-GISEL-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
|
||||
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
||||
; GCN-GISEL-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
|
||||
; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
|
||||
; GCN-GISEL-NEXT: v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
|
||||
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GCN-GISEL-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
|
||||
; GCN-GISEL-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
|
||||
; GCN-GISEL-NEXT: ; return to shader part epilog
|
||||
%r1 = call double @llvm.fma.f64(double %a, double %b, double 153.1) nounwind readnone
|
||||
%r2 = call double @llvm.fma.f64(double %a, double %r1, double 200.1) nounwind readnone
|
||||
%r3 = call double @llvm.fma.f64(double %r2, double %r1, double 200.1) nounwind readnone
|
||||
|
@ -1,5 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX942 %s
|
||||
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1250 -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX1250 %s
|
||||
|
||||
---
|
||||
name: fold_simm_virtual
|
||||
@ -564,6 +565,144 @@ body: |
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: fmac_sreg_64_src0_to_fmamk_f64
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; GFX942-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
|
||||
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
|
||||
; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
|
||||
;
|
||||
; GFX1250-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
|
||||
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
|
||||
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
|
||||
%0:vreg_64_align2 = IMPLICIT_DEF
|
||||
%1:vreg_64_align2 = IMPLICIT_DEF
|
||||
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
|
||||
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
SI_RETURN_TO_EPILOG %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fmac_sreg_64_src1_to_fmamk_f64
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; GCN-LABEL: name: fmac_sreg_64_src1_to_fmamk_f64
|
||||
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
|
||||
; GCN-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
|
||||
%0:vreg_64_align2 = IMPLICIT_DEF
|
||||
%1:vreg_64_align2 = IMPLICIT_DEF
|
||||
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
|
||||
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
SI_RETURN_TO_EPILOG %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fmac_vreg_64_to_fmaak_f64
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; GFX942-LABEL: name: fmac_vreg_64_to_fmaak_f64
|
||||
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
|
||||
; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
|
||||
;
|
||||
; GFX1250-LABEL: name: fmac_vreg_64_to_fmaak_f64
|
||||
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
|
||||
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
|
||||
%0:vreg_64_align2 = IMPLICIT_DEF
|
||||
%1:vreg_64_align2 = IMPLICIT_DEF
|
||||
%2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
|
||||
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
|
||||
SI_RETURN_TO_EPILOG %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fma_sreg_64_src0_to_fmamk_f64
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; GFX942-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
|
||||
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
|
||||
; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
|
||||
;
|
||||
; GFX1250-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
|
||||
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
|
||||
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
|
||||
%0:vreg_64_align2 = IMPLICIT_DEF
|
||||
%1:vreg_64_align2 = IMPLICIT_DEF
|
||||
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
|
||||
%3:vreg_64_align2 = V_FMA_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
SI_RETURN_TO_EPILOG %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fma_sreg_64_src1_to_fmamk_f64
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; GCN-LABEL: name: fma_sreg_64_src1_to_fmamk_f64
|
||||
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
|
||||
; GCN-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
|
||||
%0:vreg_64_align2 = IMPLICIT_DEF
|
||||
%1:vreg_64_align2 = IMPLICIT_DEF
|
||||
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
|
||||
%3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
SI_RETURN_TO_EPILOG %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fma_vreg_64_to_fmaak_f64
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; GFX942-LABEL: name: fma_vreg_64_to_fmaak_f64
|
||||
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
|
||||
; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
|
||||
;
|
||||
; GFX1250-LABEL: name: fma_vreg_64_to_fmaak_f64
|
||||
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
|
||||
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
|
||||
%0:vreg_64_align2 = IMPLICIT_DEF
|
||||
%1:vreg_64_align2 = IMPLICIT_DEF
|
||||
%2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
|
||||
%3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
|
||||
SI_RETURN_TO_EPILOG %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_v_mov_b32_e32_literal_to_agpr
|
||||
body: |
|
||||
|
62
llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir
Normal file
62
llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir
Normal file
@ -0,0 +1,62 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass si-shrink-instructions %s -o - | FileCheck %s -check-prefix=GFX1250
|
||||
|
||||
---
|
||||
name: fma_cvv_f64
|
||||
body: |
|
||||
bb.0:
|
||||
; GFX1250-LABEL: name: fma_cvv_f64
|
||||
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
|
||||
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
|
||||
$vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
$vgpr2_vgpr3 = IMPLICIT_DEF
|
||||
$vgpr4_vgpr5 = V_FMA_F64_e64 0, 4638355772470722560, 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
|
||||
SI_RETURN implicit $vgpr4_vgpr5
|
||||
...
|
||||
|
||||
---
|
||||
name: fma_vcv_f64
|
||||
body: |
|
||||
bb.0:
|
||||
; GFX1250-LABEL: name: fma_vcv_f64
|
||||
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
|
||||
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
|
||||
$vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
$vgpr2_vgpr3 = IMPLICIT_DEF
|
||||
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, 4638355772470722560, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
|
||||
SI_RETURN implicit $vgpr4_vgpr5
|
||||
...
|
||||
|
||||
---
|
||||
name: fma_vvc_f64
|
||||
body: |
|
||||
bb.0:
|
||||
; GFX1250-LABEL: name: fma_vvc_f64
|
||||
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
|
||||
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
|
||||
$vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
$vgpr2_vgpr3 = IMPLICIT_DEF
|
||||
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
|
||||
SI_RETURN implicit $vgpr4_vgpr5
|
||||
...
|
||||
|
||||
---
|
||||
name: fma_vsc_f64
|
||||
body: |
|
||||
bb.0:
|
||||
; GFX1250-LABEL: name: fma_vsc_f64
|
||||
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
|
||||
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
|
||||
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
|
||||
$vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
$vgpr2_vgpr3 = IMPLICIT_DEF
|
||||
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
|
||||
SI_RETURN implicit $vgpr4_vgpr5
|
||||
...
|
@ -1,8 +1,10 @@
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefixes=GCN,GFX90A %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s --passes=two-address-instruction -verify-each -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -run-pass twoaddressinstruction -o - | FileCheck -check-prefixes=GCN,GFX1250 %s
|
||||
|
||||
# GCN-LABEL: name: test_fmamk_reg_imm_f64
|
||||
# GCN: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX1250: V_FMAMK_F64 killed %0, 4607182418800017408, killed %1, implicit $mode, implicit $exec
|
||||
---
|
||||
name: test_fmamk_reg_imm_f64
|
||||
registers:
|
||||
@ -21,7 +23,8 @@ body: |
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: test_fmamk_imm_reg_f64
|
||||
# GCN: V_FMA_F64_e64 0, %2, 0, killed %0.sub0_sub1, 0, killed %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX90A: V_FMA_F64_e64 0, %2, 0, killed %0.sub0_sub1, 0, killed %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX1250: V_FMAMK_F64 killed %0.sub0_sub1, 4607182418800017408, killed %1, implicit $mode, implicit $exec
|
||||
---
|
||||
name: test_fmamk_imm_reg_f64
|
||||
registers:
|
||||
@ -40,7 +43,8 @@ body: |
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: test_fmaak_f64
|
||||
# GCN: V_FMA_F64_e64 0, killed %0.sub0_sub1, 0, %0.sub2_sub3, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX90A: V_FMA_F64_e64 0, killed %0.sub0_sub1, 0, %0.sub2_sub3, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX1250: V_FMAAK_F64 killed %0.sub0_sub1, %0.sub2_sub3, 4607182418800017408, implicit $mode, implicit $exec
|
||||
---
|
||||
name: test_fmaak_f64
|
||||
registers:
|
||||
@ -57,7 +61,8 @@ body: |
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: test_fmaak_sgpr_src0_f64
|
||||
# GCN: V_FMA_F64_e64 0, killed %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX1250: V_FMAMK_F64 killed %0, 4607182418800017408, %2, implicit $mode, implicit $exec
|
||||
|
||||
---
|
||||
name: test_fmaak_sgpr_src0_f64
|
||||
@ -77,7 +82,8 @@ body: |
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: test_fmaak_inlineimm_src0_f64
|
||||
# GCN: V_FMA_F64_e64 0, 4611686018427387904, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX90A: V_FMA_F64_e64 0, 4611686018427387904, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX1250: V_FMAMK_F64 4611686018427387904, 4607182418800017408, %1, implicit $mode, implicit $exec
|
||||
|
||||
---
|
||||
name: test_fmaak_inlineimm_src0_f64
|
||||
@ -95,7 +101,8 @@ body: |
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: test_fmaak_otherimm_src0_f64
|
||||
# GCN: V_FMAC_F64_e32 4636737291354636288, %0, %2, implicit $mode, implicit $exec
|
||||
# GFX90A: V_FMAC_F64_e32 4636737291354636288, %0, %2, implicit $mode, implicit $exec
|
||||
# GFX1250: V_FMAMK_F64 %0, 4636737291354636288, %1, implicit $mode, implicit $exec
|
||||
|
||||
---
|
||||
name: test_fmaak_otherimm_src0_f64
|
||||
@ -134,7 +141,8 @@ body: |
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: test_fmamk_reg_unfoldable_literal_src0_f64
|
||||
# GCN: V_FMA_F64_e64 0, %2, 0, killed %0, 0, killed %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX90A: V_FMA_F64_e64 0, %2, 0, killed %0, 0, killed %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX1250: V_FMAMK_F64 killed %0, 123456, killed %1, implicit $mode, implicit $exec
|
||||
---
|
||||
name: test_fmamk_reg_unfoldable_literal_src0_f64
|
||||
registers:
|
||||
@ -153,7 +161,8 @@ body: |
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: test_fmamk_reg_unfoldable_literal_src1_f64
|
||||
# GCN: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX1250: V_FMAMK_F64 killed %0, 123456, killed %1, implicit $mode, implicit $exec
|
||||
---
|
||||
name: test_fmamk_reg_unfoldable_literal_src1_f64
|
||||
registers:
|
||||
@ -172,7 +181,8 @@ body: |
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: test_fmaak_reg_unfoldable_literal_src2_f64
|
||||
# GCN: V_FMA_F64_e64 0, killed %0, 0, killed %1, 0, %2, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX90A: V_FMA_F64_e64 0, killed %0, 0, killed %1, 0, %2, 0, 0, implicit $mode, implicit $exec
|
||||
# GFX1250: V_FMAAK_F64 killed %0, killed %1, 123456, implicit $mode, implicit $exec
|
||||
---
|
||||
name: test_fmaak_reg_unfoldable_literal_src2_f64
|
||||
registers:
|
||||
|
Loading…
x
Reference in New Issue
Block a user