[AMDGPU] Codegen support for v_fmaak_f64/f_fmamk_f64 (#148734)

This commit is contained in:
Stanislav Mekhanoshin 2025-07-14 17:57:06 -07:00 committed by GitHub
parent 2c6771889a
commit cbba8f0acb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 271 additions and 28 deletions

View File

@ -3513,6 +3513,10 @@ static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc) {
? AMDGPU::V_FMAAK_F16_t16
: AMDGPU::V_FMAAK_F16_fake16
: AMDGPU::V_FMAAK_F16;
case AMDGPU::V_FMAC_F64_e32:
case AMDGPU::V_FMAC_F64_e64:
case AMDGPU::V_FMA_F64_e64:
return AMDGPU::V_FMAAK_F64;
default:
llvm_unreachable("invalid instruction");
}
@ -3541,6 +3545,10 @@ static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc) {
? AMDGPU::V_FMAMK_F16_t16
: AMDGPU::V_FMAMK_F16_fake16
: AMDGPU::V_FMAMK_F16;
case AMDGPU::V_FMAC_F64_e32:
case AMDGPU::V_FMAC_F64_e64:
case AMDGPU::V_FMA_F64_e64:
return AMDGPU::V_FMAMK_F64;
default:
llvm_unreachable("invalid instruction");
}
@ -3619,7 +3627,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMA_F64_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64) {
// Don't fold if we are using source or output modifiers. The new VOP2
// instructions don't have them.
if (hasAnyModifiersSet(UseMI))
@ -3691,7 +3700,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@ -3759,7 +3769,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@ -4080,8 +4091,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
const MachineOperand *OpSel = getNamedOperand(MI, AMDGPU::OpName::op_sel);
if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
!IsLegacy &&
if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
(!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
// If we have an SGPR input, we will violate the constant bus restriction.
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {

View File

@ -463,6 +463,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
break;
case AMDGPU::V_FMA_F64_e64:
if (ST->hasFmaakFmamkF64Insts())
NewOpcode = AMDGPU::V_FMAAK_F64;
break;
}
}
@ -497,6 +501,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
break;
case AMDGPU::V_FMA_F64_e64:
if (ST->hasFmaakFmamkF64Insts())
NewOpcode = AMDGPU::V_FMAMK_F64;
break;
}
}
@ -961,7 +969,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
(MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
ST->hasFmaakFmamkF64Insts())) {
shrinkMadFma(MI);
continue;
}

View File

@ -256,17 +256,28 @@ define amdgpu_ps <2 x float> @v_lshl_add_u64(i64 %a) {
; No folding into VOP2 promoted to VOP3
define amdgpu_ps <2 x float> @v_fma_f64(double %a, double %b) {
; GCN-LABEL: v_fma_f64:
; GCN: ; %bb.0:
; GCN-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GCN-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3]
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GCN-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
; GCN-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GCN-NEXT: ; return to shader part epilog
; GCN-SDAG-LABEL: v_fma_f64:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: v_fmaak_f64 v[4:5], v[0:1], v[2:3], lit64(0x4063233333333333)
; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GCN-SDAG-NEXT: v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
; GCN-SDAG-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GCN-SDAG-NEXT: ; return to shader part epilog
;
; GCN-GISEL-LABEL: v_fma_f64:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GCN-GISEL-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
; GCN-GISEL-NEXT: v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GCN-GISEL-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
; GCN-GISEL-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GCN-GISEL-NEXT: ; return to shader part epilog
%r1 = call double @llvm.fma.f64(double %a, double %b, double 153.1) nounwind readnone
%r2 = call double @llvm.fma.f64(double %a, double %r1, double 200.1) nounwind readnone
%r3 = call double @llvm.fma.f64(double %r2, double %r1, double 200.1) nounwind readnone

View File

@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX942 %s
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1250 -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX1250 %s
---
name: fold_simm_virtual
@ -564,6 +565,144 @@ body: |
...
---
name: fmac_sreg_64_src0_to_fmamk_f64
tracksRegLiveness: true
body: |
bb.0:
; GFX942-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
;
; GFX1250-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...
---
name: fmac_sreg_64_src1_to_fmamk_f64
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: fmac_sreg_64_src1_to_fmamk_f64
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
; GCN-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...
---
name: fmac_vreg_64_to_fmaak_f64
tracksRegLiveness: true
body: |
bb.0:
; GFX942-LABEL: name: fmac_vreg_64_to_fmaak_f64
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
;
; GFX1250-LABEL: name: fmac_vreg_64_to_fmaak_f64
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...
---
name: fma_sreg_64_src0_to_fmamk_f64
tracksRegLiveness: true
body: |
bb.0:
; GFX942-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
;
; GFX1250-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
%3:vreg_64_align2 = V_FMA_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...
---
name: fma_sreg_64_src1_to_fmamk_f64
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: fma_sreg_64_src1_to_fmamk_f64
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
; GCN-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
%3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...
---
name: fma_vreg_64_to_fmaak_f64
tracksRegLiveness: true
body: |
bb.0:
; GFX942-LABEL: name: fma_vreg_64_to_fmaak_f64
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
;
; GFX1250-LABEL: name: fma_vreg_64_to_fmaak_f64
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
%3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...
---
name: fold_v_mov_b32_e32_literal_to_agpr
body: |

View File

@ -0,0 +1,62 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass si-shrink-instructions %s -o - | FileCheck %s -check-prefix=GFX1250
---
name: fma_cvv_f64
body: |
bb.0:
; GFX1250-LABEL: name: fma_cvv_f64
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr2_vgpr3 = IMPLICIT_DEF
$vgpr4_vgpr5 = V_FMA_F64_e64 0, 4638355772470722560, 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit $vgpr4_vgpr5
...
---
name: fma_vcv_f64
body: |
bb.0:
; GFX1250-LABEL: name: fma_vcv_f64
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr2_vgpr3 = IMPLICIT_DEF
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, 4638355772470722560, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit $vgpr4_vgpr5
...
---
name: fma_vvc_f64
body: |
bb.0:
; GFX1250-LABEL: name: fma_vvc_f64
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr2_vgpr3 = IMPLICIT_DEF
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit $vgpr4_vgpr5
...
---
name: fma_vsc_f64
body: |
bb.0:
; GFX1250-LABEL: name: fma_vsc_f64
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr2_vgpr3 = IMPLICIT_DEF
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit $vgpr4_vgpr5
...

View File

@ -1,8 +1,10 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefixes=GCN,GFX90A %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s --passes=two-address-instruction -verify-each -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -run-pass twoaddressinstruction -o - | FileCheck -check-prefixes=GCN,GFX1250 %s
# GCN-LABEL: name: test_fmamk_reg_imm_f64
# GCN: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
# GFX1250: V_FMAMK_F64 killed %0, 4607182418800017408, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f64
registers:
@ -21,7 +23,8 @@ body: |
...
# GCN-LABEL: name: test_fmamk_imm_reg_f64
# GCN: V_FMA_F64_e64 0, %2, 0, killed %0.sub0_sub1, 0, killed %1, 0, 0, implicit $mode, implicit $exec
# GFX90A: V_FMA_F64_e64 0, %2, 0, killed %0.sub0_sub1, 0, killed %1, 0, 0, implicit $mode, implicit $exec
# GFX1250: V_FMAMK_F64 killed %0.sub0_sub1, 4607182418800017408, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_imm_reg_f64
registers:
@ -40,7 +43,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_f64
# GCN: V_FMA_F64_e64 0, killed %0.sub0_sub1, 0, %0.sub2_sub3, 0, %1, 0, 0, implicit $mode, implicit $exec
# GFX90A: V_FMA_F64_e64 0, killed %0.sub0_sub1, 0, %0.sub2_sub3, 0, %1, 0, 0, implicit $mode, implicit $exec
# GFX1250: V_FMAAK_F64 killed %0.sub0_sub1, %0.sub2_sub3, 4607182418800017408, implicit $mode, implicit $exec
---
name: test_fmaak_f64
registers:
@ -57,7 +61,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_sgpr_src0_f64
# GCN: V_FMA_F64_e64 0, killed %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
# GFX1250: V_FMAMK_F64 killed %0, 4607182418800017408, %2, implicit $mode, implicit $exec
---
name: test_fmaak_sgpr_src0_f64
@ -77,7 +82,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_inlineimm_src0_f64
# GCN: V_FMA_F64_e64 0, 4611686018427387904, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
# GFX90A: V_FMA_F64_e64 0, 4611686018427387904, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
# GFX1250: V_FMAMK_F64 4611686018427387904, 4607182418800017408, %1, implicit $mode, implicit $exec
---
name: test_fmaak_inlineimm_src0_f64
@ -95,7 +101,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_otherimm_src0_f64
# GCN: V_FMAC_F64_e32 4636737291354636288, %0, %2, implicit $mode, implicit $exec
# GFX90A: V_FMAC_F64_e32 4636737291354636288, %0, %2, implicit $mode, implicit $exec
# GFX1250: V_FMAMK_F64 %0, 4636737291354636288, %1, implicit $mode, implicit $exec
---
name: test_fmaak_otherimm_src0_f64
@ -134,7 +141,8 @@ body: |
...
# GCN-LABEL: name: test_fmamk_reg_unfoldable_literal_src0_f64
# GCN: V_FMA_F64_e64 0, %2, 0, killed %0, 0, killed %1, 0, 0, implicit $mode, implicit $exec
# GFX90A: V_FMA_F64_e64 0, %2, 0, killed %0, 0, killed %1, 0, 0, implicit $mode, implicit $exec
# GFX1250: V_FMAMK_F64 killed %0, 123456, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_unfoldable_literal_src0_f64
registers:
@ -153,7 +161,8 @@ body: |
...
# GCN-LABEL: name: test_fmamk_reg_unfoldable_literal_src1_f64
# GCN: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
# GFX1250: V_FMAMK_F64 killed %0, 123456, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_unfoldable_literal_src1_f64
registers:
@ -172,7 +181,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_reg_unfoldable_literal_src2_f64
# GCN: V_FMA_F64_e64 0, killed %0, 0, killed %1, 0, %2, 0, 0, implicit $mode, implicit $exec
# GFX90A: V_FMA_F64_e64 0, killed %0, 0, killed %1, 0, %2, 0, 0, implicit $mode, implicit $exec
# GFX1250: V_FMAAK_F64 killed %0, killed %1, 123456, implicit $mode, implicit $exec
---
name: test_fmaak_reg_unfoldable_literal_src2_f64
registers: