AMDGPU: Handle rewriting VGPR MFMA to AGPR with subregister copies (#153019)
This should address the case where the result isn't fully used, resulting in partial copy bundles from the MFMA result.
This commit is contained in:
parent
fd28257195
commit
eefad7438c
@ -14,8 +14,6 @@
|
||||
/// MFMA opcode.
|
||||
///
|
||||
/// TODO:
|
||||
/// - Handle SplitKit partial copy bundles, and not just full copy instructions
|
||||
///
|
||||
/// - Update LiveIntervals incrementally instead of recomputing from scratch
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -37,6 +35,7 @@ using namespace llvm;
|
||||
namespace {
|
||||
|
||||
class AMDGPURewriteAGPRCopyMFMAImpl {
|
||||
MachineFunction &MF;
|
||||
const GCNSubtarget &ST;
|
||||
const SIInstrInfo &TII;
|
||||
const SIRegisterInfo &TRI;
|
||||
@ -53,7 +52,7 @@ public:
|
||||
AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
|
||||
LiveRegMatrix &LRM, LiveIntervals &LIS,
|
||||
const RegisterClassInfo &RegClassInfo)
|
||||
: ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
|
||||
: MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
|
||||
TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
|
||||
LIS(LIS), RegClassInfo(RegClassInfo) {}
|
||||
|
||||
@ -71,26 +70,28 @@ public:
|
||||
///
|
||||
/// \p RewriteRegs will accumulate the set of register used by those MFMAs
|
||||
/// that need to have the register classes adjusted.
|
||||
const TargetRegisterClass *recomputeRegClassExceptRewritable(
|
||||
Register Reg, const TargetRegisterClass *OldRC,
|
||||
const TargetRegisterClass *NewRC,
|
||||
SmallVectorImpl<MachineInstr *> &RewriteCandidates,
|
||||
bool recomputeRegClassExceptRewritable(
|
||||
Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
|
||||
SmallSetVector<Register, 4> &RewriteRegs) const;
|
||||
|
||||
bool run(MachineFunction &MF) const;
|
||||
};
|
||||
|
||||
const TargetRegisterClass *
|
||||
AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
|
||||
Register StartReg, const TargetRegisterClass *OldRC,
|
||||
const TargetRegisterClass *NewRC,
|
||||
SmallVectorImpl<MachineInstr *> &RewriteCandidates,
|
||||
bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
|
||||
Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
|
||||
SmallSetVector<Register, 4> &RewriteRegs) const {
|
||||
SmallVector<Register, 8> Worklist = {StartReg};
|
||||
|
||||
// Recursively visit all transitive MFMA users
|
||||
while (!Worklist.empty()) {
|
||||
Register Reg = Worklist.pop_back_val();
|
||||
const TargetRegisterClass *OldRC = MRI.getRegClass(Reg);
|
||||
|
||||
// Inflate to the equivalent AV_* class.
|
||||
const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass(OldRC, MF);
|
||||
if (OldRC == NewRC)
|
||||
return false;
|
||||
|
||||
// Accumulate constraints from all uses.
|
||||
for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
|
||||
// Apply the effect of the given operand to NewRC.
|
||||
@ -101,23 +102,40 @@ AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
|
||||
// either AGPR or VGPR in src0/src1, so don't bother checking the
|
||||
// constraint effects of the individual operands.
|
||||
if (isRewriteCandidate(*MI)) {
|
||||
for (AMDGPU::OpName OpName :
|
||||
{AMDGPU::OpName::vdst, AMDGPU::OpName::src2}) {
|
||||
const MachineOperand *Op = TII.getNamedOperand(*MI, OpName);
|
||||
const MachineOperand *VDst =
|
||||
TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
|
||||
const MachineOperand *Src2 =
|
||||
TII.getNamedOperand(*MI, AMDGPU::OpName::src2);
|
||||
for (const MachineOperand *Op : {VDst, Src2}) {
|
||||
if (!Op->isReg())
|
||||
continue;
|
||||
|
||||
Register OtherReg = Op->getReg();
|
||||
if (OtherReg != Reg) {
|
||||
if (RewriteRegs.insert(OtherReg))
|
||||
Worklist.push_back(OtherReg);
|
||||
}
|
||||
if (OtherReg.isPhysical())
|
||||
return false;
|
||||
|
||||
if (OtherReg != Reg && RewriteRegs.insert(OtherReg))
|
||||
Worklist.push_back(OtherReg);
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Ignoring effects of " << *MI);
|
||||
if (!is_contained(RewriteCandidates, MI)) {
|
||||
LLVM_DEBUG({
|
||||
Register VDstPhysReg = VRM.getPhys(VDst->getReg());
|
||||
dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
|
||||
<< " Dst=[" << printReg(VDst->getReg()) << " => "
|
||||
<< printReg(VDstPhysReg, &TRI);
|
||||
|
||||
if (Src2->isReg()) {
|
||||
Register Src2PhysReg = VRM.getPhys(Src2->getReg());
|
||||
dbgs() << "], Src2=[" << printReg(Src2->getReg(), &TRI) << " => "
|
||||
<< printReg(Src2PhysReg, &TRI);
|
||||
}
|
||||
|
||||
dbgs() << "]: " << MI;
|
||||
});
|
||||
|
||||
if (!is_contained(RewriteCandidates, MI))
|
||||
RewriteCandidates.push_back(MI);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
@ -126,13 +144,14 @@ AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
|
||||
NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, &TII, &TRI);
|
||||
if (!NewRC || NewRC == OldRC) {
|
||||
LLVM_DEBUG(dbgs() << "User of " << printReg(Reg, &TRI)
|
||||
<< " cannot be reassigned to AGPR: " << *MI);
|
||||
return nullptr;
|
||||
<< " cannot be reassigned to "
|
||||
<< TRI.getRegClassName(NewRC) << ": " << *MI);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NewRC;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
|
||||
@ -228,10 +247,7 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
|
||||
continue;
|
||||
|
||||
MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
|
||||
|
||||
// TODO: Handle SplitKit produced copy bundles for partially defined
|
||||
// registers.
|
||||
if (!DefMI || !DefMI->isFullCopy())
|
||||
if (!DefMI || !DefMI->isCopy())
|
||||
continue;
|
||||
|
||||
Register MFMADstReg = DefMI->getOperand(1).getReg();
|
||||
@ -244,34 +260,6 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
|
||||
if (!MFMA || !isRewriteCandidate(*MFMA))
|
||||
continue;
|
||||
|
||||
MachineOperand *Src2 = TII.getNamedOperand(*MFMA, AMDGPU::OpName::src2);
|
||||
Register Src2Reg;
|
||||
if (Src2->isReg()) {
|
||||
Src2Reg = Src2->getReg();
|
||||
if (!Src2Reg.isVirtual())
|
||||
continue;
|
||||
}
|
||||
|
||||
// FIXME: getMinimalPhysRegClass returns a nonsense AV_* subclass instead
|
||||
// of an AGPR or VGPR subclass, so we can't simply use the result on the
|
||||
// assignment.
|
||||
|
||||
LLVM_DEBUG({
|
||||
dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
|
||||
<< " Dst=[" << printReg(VReg) << " => "
|
||||
<< printReg(PhysReg, &TRI);
|
||||
|
||||
if (Src2Reg) {
|
||||
Register Src2PhysReg = VRM.getPhys(Src2Reg);
|
||||
dbgs() << "], Src2=[" << printReg(Src2Reg, &TRI) << " => "
|
||||
<< printReg(Src2PhysReg, &TRI);
|
||||
}
|
||||
|
||||
dbgs() << "]: " << *MFMA;
|
||||
});
|
||||
|
||||
const TargetRegisterClass *DstVirtRegRC = MRI.getRegClass(MFMADstReg);
|
||||
|
||||
// src2 and dst have the same physical class constraint; try to preserve
|
||||
// the original src2 subclass if one were to exist.
|
||||
SmallVector<MachineInstr *, 4> RewriteCandidates = {MFMA};
|
||||
@ -290,11 +278,9 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
|
||||
//
|
||||
// Note recomputeRegClassExceptRewritable will consider the constraints of
|
||||
// this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
|
||||
const TargetRegisterClass *DstExceptRC =
|
||||
recomputeRegClassExceptRewritable(MFMADstReg, DstVirtRegRC, VirtRegRC,
|
||||
RewriteCandidates, RewriteRegs);
|
||||
if (!DstExceptRC) {
|
||||
LLVM_DEBUG(dbgs() << "Could not recompute the regclass of "
|
||||
if (!recomputeRegClassExceptRewritable(MFMADstReg, RewriteCandidates,
|
||||
RewriteRegs)) {
|
||||
LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
|
||||
<< printReg(MFMADstReg, &TRI) << '\n');
|
||||
continue;
|
||||
}
|
||||
|
@ -20,10 +20,6 @@
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_first() #1 {
|
||||
ret void
|
||||
}
|
||||
@ -420,93 +416,6 @@ body: |
|
||||
|
||||
...
|
||||
|
||||
# Non-mac variant, src2 is the same VGPR, but a different subregister.
|
||||
---
|
||||
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
occupancy: 10
|
||||
sgprForEXECCopy: '$sgpr100_sgpr101'
|
||||
body: |
|
||||
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
|
||||
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
|
||||
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
|
||||
; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
|
||||
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; CHECK-NEXT: S_BRANCH %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31:0x00000000FFFFFFFF
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
S_NOP 0, implicit-def $agpr0
|
||||
renamable $sgpr0 = S_MOV_B32 0
|
||||
undef %0.sub8:vreg_1024_align2 = V_MOV_B32_e32 0, implicit $exec
|
||||
renamable $sgpr1 = COPY renamable $sgpr0
|
||||
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
|
||||
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
%0.sub9:vreg_1024_align2 = COPY %0.sub8
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc
|
||||
|
||||
undef %0.sub0_sub1:vreg_1024_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
%0.sub16_sub17:vreg_1024_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
%0.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:vreg_1024_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
; No VGPRs available for %0
|
||||
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# There isn't an assignable AGPR around the first MFMA.
|
||||
---
|
||||
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_chain_no_agprs_first
|
||||
|
@ -1116,11 +1116,8 @@ body: |
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $agpr0_agpr1
|
||||
; CHECK-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
|
||||
; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 killed $vgpr2_vgpr3, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: renamable $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY renamable $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_mac_e64 killed $vgpr2_vgpr3, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
@ -1202,10 +1199,8 @@ body: |
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
; CHECK-NEXT: renamable $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
|
||||
; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $agpr0_agpr1
|
||||
; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: renamable $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
|
||||
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY renamable $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; CHECK-NEXT: renamable $agpr16_agpr17 = COPY killed renamable $agpr0_agpr1
|
||||
; CHECK-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 killed $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
@ -1957,3 +1952,89 @@ body: |
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# Non-mac variant, src2 is the same VGPR, but a different subregister.
|
||||
---
|
||||
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
occupancy: 10
|
||||
sgprForEXECCopy: '$sgpr100_sgpr101'
|
||||
body: |
|
||||
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
|
||||
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
|
||||
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
|
||||
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1
|
||||
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $agpr0_agpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: renamable $agpr16_agpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; CHECK-NEXT: S_BRANCH %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31:0x00000000FFFFFFFF
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
S_NOP 0, implicit-def $agpr0
|
||||
renamable $sgpr0 = S_MOV_B32 0
|
||||
undef %0.sub8:vreg_1024_align2 = V_MOV_B32_e32 0, implicit $exec
|
||||
renamable $sgpr1 = COPY renamable $sgpr0
|
||||
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
|
||||
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
%0.sub9:vreg_1024_align2 = COPY %0.sub8
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc
|
||||
|
||||
undef %0.sub0_sub1:vreg_1024_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
%0.sub16_sub17:vreg_1024_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
%0.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:vreg_1024_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31, 0, 0, 0, implicit $mode, implicit $exec
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
; No VGPRs available for %0
|
||||
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
|
||||
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
|
||||
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
|
||||
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -15,9 +15,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]]
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY3]]
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]].sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
@ -48,9 +48,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub0
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
@ -77,9 +77,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub0
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
@ -106,9 +106,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub1
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub1
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
@ -135,9 +135,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]].sub2_sub3:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub0
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
@ -164,9 +164,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub2_sub3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]].sub2_sub3:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub2_sub3
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY3]]
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]].sub2_sub3, 0, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
|
@ -14,9 +14,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]]
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY3]]
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
@ -44,9 +44,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]]
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY3]]
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
@ -74,11 +74,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0_sub1
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_1:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_1]]
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub0_sub1
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_1:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_1]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3735561 /* reguse:AReg_64_Align2 */, [[COPY3]]
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
@ -145,12 +145,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_1:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_1]].sub0_sub1
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_2:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_1]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_2]]
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_e64_1:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_1]].sub0_sub1
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_2:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_1]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_2]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY3]]
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
@ -182,13 +182,13 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use0:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_1:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use1:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_1]].sub0_sub1
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_2:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_1]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_2]]
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use0:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub0
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_e64_1:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use1:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_1]].sub0_sub1
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_2:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_1]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[V_MFMA_F64_4X4X4F64_e64_2]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, [[COPY3]]
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
@ -263,13 +263,13 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use0:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0_sub1
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: %other_use1:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub2_sub3
|
||||
; CHECK-NEXT: dead %other_use2:vreg_64 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub1_sub2
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub1_sub2
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]].sub2_sub3:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use0:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub0_sub1
|
||||
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: %other_use1:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub2_sub3
|
||||
; CHECK-NEXT: dead %other_use2:vreg_64 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub1_sub2
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub1_sub2
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3473417 /* reguse:AReg_64 */, [[COPY3]]
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], %other_use1, 0, 0, implicit $exec :: (store (s64), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
@ -301,13 +301,13 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use0:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub0_sub1
|
||||
; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub2_sub3:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: %other_use1:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub2_sub3
|
||||
; CHECK-NEXT: dead %other_use2:vreg_64 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub1_sub2
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64 = COPY [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]].sub1_sub2
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]].sub2_sub3:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: dead %other_use0:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub0_sub1
|
||||
; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub2_sub3:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[V_MFMA_F64_4X4X4F64_e64_]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: %other_use1:vreg_64_align2 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub2_sub3
|
||||
; CHECK-NEXT: dead %other_use2:vreg_64 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub1_sub2
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_64 = COPY [[V_MFMA_F64_4X4X4F64_e64_]].sub1_sub2
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3473417 /* reguse:AReg_64 */, [[COPY3]]
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], %other_use1, 0, 0, implicit $exec :: (store (s64), addrspace 1)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
|
@ -250,29 +250,20 @@ bb:
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: Handle rewriting this case
|
||||
define void @test_rewrite_mfma_subreg_extract0(float %arg0, float %arg1, ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-LABEL: test_rewrite_mfma_subreg_extract0:
|
||||
; CHECK: ; %bb.0: ; %bb
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: global_load_dwordx4 v[30:33], v[2:3], off offset:112
|
||||
; CHECK-NEXT: global_load_dwordx4 v[26:29], v[2:3], off offset:96
|
||||
; CHECK-NEXT: global_load_dwordx4 v[22:25], v[2:3], off offset:80
|
||||
; CHECK-NEXT: global_load_dwordx4 v[18:21], v[2:3], off offset:64
|
||||
; CHECK-NEXT: global_load_dwordx4 v[14:17], v[2:3], off offset:48
|
||||
; CHECK-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:32
|
||||
; CHECK-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:16
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
||||
; CHECK-NEXT: global_load_dwordx4 a[28:31], v[2:3], off offset:112
|
||||
; CHECK-NEXT: global_load_dwordx4 a[24:27], v[2:3], off offset:96
|
||||
; CHECK-NEXT: global_load_dwordx4 a[20:23], v[2:3], off offset:80
|
||||
; CHECK-NEXT: global_load_dwordx4 a[16:19], v[2:3], off offset:64
|
||||
; CHECK-NEXT: global_load_dwordx4 a[12:15], v[2:3], off offset:48
|
||||
; CHECK-NEXT: global_load_dwordx4 a[8:11], v[2:3], off offset:32
|
||||
; CHECK-NEXT: global_load_dwordx4 a[4:7], v[2:3], off offset:16
|
||||
; CHECK-NEXT: global_load_dwordx4 a[0:3], v[2:3], off
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 v[2:33], v0, v1, v[2:33]
|
||||
; CHECK-NEXT: s_nop 7
|
||||
; CHECK-NEXT: s_nop 7
|
||||
; CHECK-NEXT: s_nop 1
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a0, v2
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a1, v3
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a2, v4
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a3, v5
|
||||
; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ; use a[0:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
@ -289,26 +280,18 @@ define void @test_rewrite_mfma_subreg_extract1(float %arg0, float %arg1, ptr add
|
||||
; CHECK-LABEL: test_rewrite_mfma_subreg_extract1:
|
||||
; CHECK: ; %bb.0: ; %bb
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: global_load_dwordx4 v[30:33], v[2:3], off offset:112
|
||||
; CHECK-NEXT: global_load_dwordx4 v[26:29], v[2:3], off offset:96
|
||||
; CHECK-NEXT: global_load_dwordx4 v[22:25], v[2:3], off offset:80
|
||||
; CHECK-NEXT: global_load_dwordx4 v[18:21], v[2:3], off offset:64
|
||||
; CHECK-NEXT: global_load_dwordx4 v[14:17], v[2:3], off offset:48
|
||||
; CHECK-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:32
|
||||
; CHECK-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:16
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
||||
; CHECK-NEXT: global_load_dwordx4 a[28:31], v[2:3], off offset:112
|
||||
; CHECK-NEXT: global_load_dwordx4 a[24:27], v[2:3], off offset:96
|
||||
; CHECK-NEXT: global_load_dwordx4 a[20:23], v[2:3], off offset:80
|
||||
; CHECK-NEXT: global_load_dwordx4 a[16:19], v[2:3], off offset:64
|
||||
; CHECK-NEXT: global_load_dwordx4 a[12:15], v[2:3], off offset:48
|
||||
; CHECK-NEXT: global_load_dwordx4 a[8:11], v[2:3], off offset:32
|
||||
; CHECK-NEXT: global_load_dwordx4 a[4:7], v[2:3], off offset:16
|
||||
; CHECK-NEXT: global_load_dwordx4 a[0:3], v[2:3], off
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 v[2:33], v0, v1, v[2:33]
|
||||
; CHECK-NEXT: s_nop 7
|
||||
; CHECK-NEXT: s_nop 7
|
||||
; CHECK-NEXT: s_nop 1
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a0, v6
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a1, v7
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a2, v8
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a3, v9
|
||||
; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ; use a[0:3]
|
||||
; CHECK-NEXT: ; use a[4:7]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
bb:
|
||||
@ -324,24 +307,23 @@ define void @test_rewrite_mfma_subreg_extract2(float %arg0, float %arg1, ptr add
|
||||
; CHECK-LABEL: test_rewrite_mfma_subreg_extract2:
|
||||
; CHECK: ; %bb.0: ; %bb
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: global_load_dwordx4 v[30:33], v[2:3], off offset:112
|
||||
; CHECK-NEXT: global_load_dwordx4 v[26:29], v[2:3], off offset:96
|
||||
; CHECK-NEXT: global_load_dwordx4 v[22:25], v[2:3], off offset:80
|
||||
; CHECK-NEXT: global_load_dwordx4 v[18:21], v[2:3], off offset:64
|
||||
; CHECK-NEXT: global_load_dwordx4 v[14:17], v[2:3], off offset:48
|
||||
; CHECK-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:32
|
||||
; CHECK-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:16
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
||||
; CHECK-NEXT: global_load_dwordx4 a[28:31], v[2:3], off offset:112
|
||||
; CHECK-NEXT: global_load_dwordx4 a[24:27], v[2:3], off offset:96
|
||||
; CHECK-NEXT: global_load_dwordx4 a[20:23], v[2:3], off offset:80
|
||||
; CHECK-NEXT: global_load_dwordx4 a[16:19], v[2:3], off offset:64
|
||||
; CHECK-NEXT: global_load_dwordx4 a[12:15], v[2:3], off offset:48
|
||||
; CHECK-NEXT: global_load_dwordx4 a[8:11], v[2:3], off offset:32
|
||||
; CHECK-NEXT: global_load_dwordx4 a[4:7], v[2:3], off offset:16
|
||||
; CHECK-NEXT: global_load_dwordx4 a[0:3], v[2:3], off
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 v[2:33], v0, v1, v[2:33]
|
||||
; CHECK-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
|
||||
; CHECK-NEXT: s_nop 7
|
||||
; CHECK-NEXT: s_nop 7
|
||||
; CHECK-NEXT: s_nop 1
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a0, v3
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a1, v4
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a2, v5
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a3, v6
|
||||
; CHECK-NEXT: v_accvgpr_mov_b32 a0, a1
|
||||
; CHECK-NEXT: v_accvgpr_mov_b32 a1, a2
|
||||
; CHECK-NEXT: v_accvgpr_mov_b32 a2, a3
|
||||
; CHECK-NEXT: v_accvgpr_mov_b32 a3, a4
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ; use a[0:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
@ -483,14 +465,9 @@ define void @test_rewrite_mfma_subreg_insert0(float %arg0, float %arg1, ptr addr
|
||||
; CHECK-LABEL: test_rewrite_mfma_subreg_insert0:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
||||
; CHECK-NEXT: global_load_dwordx4 a[0:3], v[2:3], off
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5]
|
||||
; CHECK-NEXT: s_nop 3
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a2, v2
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a3, v3
|
||||
; CHECK-NEXT: v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[0:3]
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ; use a[0:7]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
@ -531,12 +508,9 @@ define void @test_rewrite_mfma_subreg_insert2(double %arg0, double %arg1, ptr ad
|
||||
; CHECK-LABEL: test_rewrite_mfma_subreg_insert2:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[4:5], off
|
||||
; CHECK-NEXT: global_load_dwordx2 a[0:1], v[4:5], off
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[4:5]
|
||||
; CHECK-NEXT: s_nop 5
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
|
||||
; CHECK-NEXT: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], v[2:3], a[0:1]
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ; use a[0:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
|
Loading…
x
Reference in New Issue
Block a user