AMDGPU: Handle V->A MFMA copy from case with immediate src2
Handle a special case for copies from AGPR VGPR on the MFMA inputs. If the "input" is really a subregister def, we will not see the usual copy to VGPR for src2, only the read of the subregister def. Not sure if this pattern appears in practice.
This commit is contained in:
parent
5d8dc9b800
commit
579e971aab
@ -377,13 +377,14 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
|
||||
Register CopyDstReg = UseMI.getOperand(0).getReg();
|
||||
if (!CopyDstReg.isVirtual())
|
||||
continue;
|
||||
for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {
|
||||
if (!CopyUseMO.readsReg())
|
||||
continue;
|
||||
|
||||
for (MachineInstr &CopyUseMI : MRI.use_instructions(CopyDstReg)) {
|
||||
MachineInstr &CopyUseMI = *CopyUseMO.getParent();
|
||||
if (isRewriteCandidate(CopyUseMI)) {
|
||||
const MachineOperand *Op =
|
||||
CopyUseMI.findRegisterUseOperand(CopyDstReg, /*TRI=*/nullptr);
|
||||
if (tryReassigningMFMAChain(CopyUseMI, Op->getOperandNo(),
|
||||
VRM.getPhys(Op->getReg())))
|
||||
if (tryReassigningMFMAChain(CopyUseMI, CopyUseMO.getOperandNo(),
|
||||
VRM.getPhys(CopyUseMO.getReg())))
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -187,8 +187,8 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user