AMDGPU: Handle V->A MFMA copy from case with immediate src2

Handle a special case for copies from AGPR VGPR on the MFMA inputs.
If the "input" is really a subregister def, we will not see the
usual copy to VGPR for src2, only the read of the subregister def.
Not sure if this pattern appears in practice.
This commit is contained in:
Matt Arsenault 2025-08-11 18:22:09 +09:00 committed by Matt Arsenault
parent 5d8dc9b800
commit 579e971aab
2 changed files with 8 additions and 7 deletions

View File

@ -377,13 +377,14 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
Register CopyDstReg = UseMI.getOperand(0).getReg();
if (!CopyDstReg.isVirtual())
continue;
for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {
if (!CopyUseMO.readsReg())
continue;
for (MachineInstr &CopyUseMI : MRI.use_instructions(CopyDstReg)) {
MachineInstr &CopyUseMI = *CopyUseMO.getParent();
if (isRewriteCandidate(CopyUseMI)) {
const MachineOperand *Op =
CopyUseMI.findRegisterUseOperand(CopyDstReg, /*TRI=*/nullptr);
if (tryReassigningMFMAChain(CopyUseMI, Op->getOperandNo(),
VRM.getPhys(Op->getReg())))
if (tryReassigningMFMAChain(CopyUseMI, CopyUseMO.getOperandNo(),
VRM.getPhys(CopyUseMO.getReg())))
MadeChange = true;
}
}

View File

@ -187,8 +187,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: SI_RETURN
%0:vreg_64_align2 = COPY $vgpr4_vgpr5