Revert "[RegAlloc] Relax the split constrain on MBB prolog" (#169990)
Reverts llvm/llvm-project#168259 breaks hip buildot
This commit is contained in:
parent
d3762edd5f
commit
3a1079fa25
@ -774,7 +774,8 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
|
||||
// Abort if the spill cannot be inserted at the MBB' start
|
||||
if (((BC.Entry == SpillPlacement::MustSpill) ||
|
||||
(BC.Entry == SpillPlacement::PrefSpill)) &&
|
||||
!SA->canSplitBeforeProlog(BC.Number))
|
||||
SlotIndex::isEarlierInstr(BI.FirstInstr,
|
||||
SA->getFirstSplitPoint(BC.Number)))
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -829,7 +830,11 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
|
||||
BCS[B].Number = Number;
|
||||
|
||||
// Abort if the spill cannot be inserted at the MBB' start
|
||||
if (!SA->canSplitBeforeProlog(Number))
|
||||
MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
|
||||
auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
|
||||
if (FirstNonDebugInstr != MBB->end() &&
|
||||
SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
|
||||
SA->getFirstSplitPoint(Number)))
|
||||
return false;
|
||||
// Interference for the live-in value.
|
||||
if (Intf.first() <= Indexes->getMBBStartIdx(Number))
|
||||
|
||||
@ -147,54 +147,6 @@ InsertPointAnalysis::getLastInsertPointIter(const LiveInterval &CurLI,
|
||||
return LIS.getInstructionFromIndex(LIP);
|
||||
}
|
||||
|
||||
bool InsertPointAnalysis::canSplitBeforeProlog(const LiveInterval &CurLI,
|
||||
const MachineBasicBlock &MBB) {
|
||||
const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
|
||||
|
||||
for (auto &MI : MBB) {
|
||||
if (MI.isPHI() || MI.isPosition() || MI.isDebugInstr() ||
|
||||
MI.isPseudoProbe())
|
||||
continue;
|
||||
|
||||
if (!TII->isBasicBlockPrologue(MI))
|
||||
return true;
|
||||
|
||||
for (auto &MO : MI.operands()) {
|
||||
if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
|
||||
continue;
|
||||
|
||||
// For the AMDGPU target if a MBB contains exec mask restore preamble,
|
||||
// SplitEditor may get state when it cannot insert a spill instruction
|
||||
// at the begin of the MBB.
|
||||
// E.g. for a MIR
|
||||
// bb.100:
|
||||
// %1 = S_OR_SAVEEXEC_B64 %2, implicit-def $exec, implicit-def $scc,
|
||||
// implicit $exec
|
||||
// ...
|
||||
// use %1
|
||||
// If the regalloc try to allocate a virtreg to the physreg already
|
||||
// assigned to virtreg %1 and the pyhsreg is computed as the best
|
||||
// candidate for split, it may insert COPY instruction.
|
||||
// bb.100:
|
||||
// %1 = S_OR_SAVEEXEC_B64 %2, implicit-def $exec, implicit-def $scc,
|
||||
// implicit $exec
|
||||
// %2 = COPY %orig
|
||||
// ...
|
||||
// use %1
|
||||
// Thus %1 and %orig still have interference. We may add cost for the
|
||||
// physreg candidate or abandon the candidate.
|
||||
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
|
||||
const TargetRegisterClass *RC = MRI.getRegClass(MO.getReg());
|
||||
const TargetRegisterClass *CurRC = MRI.getRegClass(CurLI.reg());
|
||||
if (TRI->getCommonSubClass(RC, CurRC))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Split Analysis
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -89,9 +89,6 @@ public:
|
||||
return Res;
|
||||
}
|
||||
|
||||
/// Return true if we can split \pCurLI before \pMBB's prolog.
|
||||
bool canSplitBeforeProlog(const LiveInterval &CurLI,
|
||||
const MachineBasicBlock &MBB);
|
||||
};
|
||||
|
||||
/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
|
||||
@ -250,11 +247,6 @@ public:
|
||||
SlotIndex getFirstSplitPoint(unsigned Num) {
|
||||
return IPA.getFirstInsertPoint(*MF.getBlockNumbered(Num));
|
||||
}
|
||||
|
||||
bool canSplitBeforeProlog(unsigned Num) {
|
||||
MachineBasicBlock *BB = MF.getBlockNumbered(Num);
|
||||
return IPA.canSplitBeforeProlog(*CurLI, *BB);
|
||||
}
|
||||
};
|
||||
|
||||
/// SplitEditor - Edit machine code and LiveIntervals for live range
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -47208,32 +47208,33 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:104
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v56, v2
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v43, v3
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v3, v4
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v9
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v4, v5
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v47, v6
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v6, v7
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v2, v8
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v13
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v2, v8
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v11
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v46, v10
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v5, v11
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v8, v12
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v45, v14
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v15
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v13
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v12, v16
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v13, v17
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v44, v18
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v15
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v10, v19
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v17, v20
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v11, v21
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v14, v22
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v43, v22
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v19, v23
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v23, v24
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v9, v25
|
||||
@ -47242,44 +47243,44 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v18, v28
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v7, v29
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v25, v30
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
|
||||
; SI-NEXT: s_waitcnt vmcnt(14)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v22, v40
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v20, v57
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v57, v58
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v40, v59
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v15, v60
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v61, v61
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v14, v61
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v62, v62
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v30, v63
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v16, v33
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v63, v35
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v49, v49
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v61, v49
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v29, v50
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v35, v53
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v33, v55
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v58, v31
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v49, v31
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v28, v32
|
||||
; SI-NEXT: s_waitcnt vmcnt(13)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v50, v34
|
||||
; SI-NEXT: s_waitcnt vmcnt(12)
|
||||
; SI-NEXT: s_waitcnt vmcnt(13)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v36, v36
|
||||
; SI-NEXT: s_waitcnt vmcnt(11)
|
||||
; SI-NEXT: s_waitcnt vmcnt(12)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v60, v37
|
||||
; SI-NEXT: s_waitcnt vmcnt(10)
|
||||
; SI-NEXT: s_waitcnt vmcnt(11)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v27, v38
|
||||
; SI-NEXT: s_waitcnt vmcnt(9)
|
||||
; SI-NEXT: s_waitcnt vmcnt(10)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v53, v39
|
||||
; SI-NEXT: s_waitcnt vmcnt(8)
|
||||
; SI-NEXT: s_waitcnt vmcnt(9)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v31, v48
|
||||
; SI-NEXT: s_waitcnt vmcnt(7)
|
||||
; SI-NEXT: s_waitcnt vmcnt(8)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v39, v51
|
||||
; SI-NEXT: s_waitcnt vmcnt(6)
|
||||
; SI-NEXT: s_waitcnt vmcnt(7)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v26, v52
|
||||
; SI-NEXT: s_waitcnt vmcnt(5)
|
||||
; SI-NEXT: s_waitcnt vmcnt(6)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v34, v54
|
||||
; SI-NEXT: s_waitcnt vmcnt(4)
|
||||
; SI-NEXT: s_waitcnt vmcnt(5)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v32, v41
|
||||
; SI-NEXT: s_waitcnt vmcnt(3)
|
||||
; SI-NEXT: s_waitcnt vmcnt(4)
|
||||
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v42
|
||||
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
||||
@ -47289,34 +47290,47 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_mov_b32_e32 v54, v9
|
||||
; SI-NEXT: v_mov_b32_e32 v55, v11
|
||||
; SI-NEXT: v_mov_b32_e32 v41, v13
|
||||
; SI-NEXT: v_mov_b32_e32 v48, v4
|
||||
; SI-NEXT: v_mov_b32_e32 v4, v3
|
||||
; SI-NEXT: v_mov_b32_e32 v3, v43
|
||||
; SI-NEXT: v_mov_b32_e32 v48, v5
|
||||
; SI-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: s_cbranch_execz .LBB58_2
|
||||
; SI-NEXT: ; %bb.1: ; %cmp.true
|
||||
; SI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
|
||||
; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
|
||||
; SI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v37, v56
|
||||
; SI-NEXT: v_mov_b32_e32 v7, v39
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v39, v47
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v48, v48
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v30, v30
|
||||
; SI-NEXT: v_add_f32_e32 v37, 0x38000000, v37
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v37
|
||||
; SI-NEXT: v_add_f32_e32 v39, 0x38000000, v39
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v30, v30
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v47, 16, v37
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v39
|
||||
; SI-NEXT: v_add_f32_e32 v39, 0x38000000, v48
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v39, v39
|
||||
; SI-NEXT: v_add_f32_e32 v21, 0x38000000, v21
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: v_add_f32_e32 v30, 0x38000000, v30
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v27, v27
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v11, v33
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v58, 16, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v25
|
||||
; SI-NEXT: v_add_f32_e32 v27, 0x38000000, v27
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v13, v35
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v16, v16
|
||||
; SI-NEXT: v_add_f32_e32 v27, 0x38000000, v27
|
||||
; SI-NEXT: v_add_f32_e32 v21, 0x38000000, v21
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v14, v14
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v15, v15
|
||||
; SI-NEXT: v_add_f32_e32 v11, 0x38000000, v11
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v33, v11
|
||||
; SI-NEXT: v_add_f32_e32 v13, 0x38000000, v13
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
|
||||
; SI-NEXT: v_add_f32_e32 v16, 0x38000000, v16
|
||||
; SI-NEXT: v_add_f32_e32 v14, 0x38000000, v14
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v16, v16
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v14, v14
|
||||
; SI-NEXT: v_add_f32_e32 v15, 0x38000000, v15
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v15, v15
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v20, v20
|
||||
@ -47335,7 +47349,6 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v18, v18
|
||||
; SI-NEXT: v_add_f32_e32 v24, 0x38000000, v24
|
||||
; SI-NEXT: v_add_f32_e32 v23, 0x38000000, v23
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v24, v24
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v23, v23
|
||||
@ -47354,50 +47367,41 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_add_f32_e32 v8, 0x38000000, v8
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v8, v8
|
||||
; SI-NEXT: v_add_f32_e32 v5, 0x38000000, v5
|
||||
; SI-NEXT: v_add_f32_e32 v2, 0x38000000, v2
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
||||
; SI-NEXT: v_add_f32_e32 v6, 0x38000000, v6
|
||||
; SI-NEXT: v_add_f32_e32 v4, 0x38000000, v4
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v6, v6
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
|
||||
; SI-NEXT: v_add_f32_e32 v3, 0x38000000, v3
|
||||
; SI-NEXT: s_waitcnt vmcnt(1)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v38, v5
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v38, v7
|
||||
; SI-NEXT: v_mov_b32_e32 v7, v39
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v39, v47
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v47, 16, v37
|
||||
; SI-NEXT: v_add_f32_e32 v38, 0x38000000, v38
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v38, v38
|
||||
; SI-NEXT: v_add_f32_e32 v39, 0x38000000, v39
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v39
|
||||
; SI-NEXT: v_add_f32_e32 v39, 0x38000000, v48
|
||||
; SI-NEXT: v_or_b32_e32 v9, v38, v47
|
||||
; SI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_or_b32_e32 v5, v38, v47
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v38, v46
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v46, 16, v37
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v39, v39
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v37, v9
|
||||
; SI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_add_f32_e32 v38, 0x38000000, v38
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v38, v38
|
||||
; SI-NEXT: v_or_b32_e32 v48, v39, v46
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v39, v45
|
||||
; SI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v45, 16, v38
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_add_f32_e32 v37, 0x38000000, v37
|
||||
; SI-NEXT: v_add_f32_e32 v39, 0x38000000, v39
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v37
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v39, v39
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v57, 16, v39
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v39, v41
|
||||
; SI-NEXT: v_add_f32_e32 v39, 0x38000000, v39
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v39, v39
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v37, v9
|
||||
; SI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_add_f32_e32 v37, 0x38000000, v37
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v37
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_waitcnt vmcnt(2)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v38, v9
|
||||
; SI-NEXT: v_or_b32_e32 v9, v37, v45
|
||||
; SI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
|
||||
@ -47408,37 +47412,35 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v37
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_or_b32_e32 v9, v38, v57
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v38, v14
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v38, v43
|
||||
; SI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v43, 16, v37
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v37, v55
|
||||
; SI-NEXT: v_add_f32_e32 v38, 0x38000000, v38
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v38, v38
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v9, v58
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v58, 16, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v25
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v42, 16, v38
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v38, v54
|
||||
; SI-NEXT: v_or_b32_e32 v41, v39, v43
|
||||
; SI-NEXT: v_add_f32_e32 v37, 0x38000000, v37
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v37
|
||||
; SI-NEXT: v_add_f32_e32 v21, 0x38000000, v21
|
||||
; SI-NEXT: v_add_f32_e32 v25, 0x38000000, v38
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v25, v25
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v42, 16, v38
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v38, v54
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v9, v63
|
||||
; SI-NEXT: v_or_b32_e32 v55, v37, v42
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v37, v52
|
||||
; SI-NEXT: v_add_f32_e32 v25, 0x38000000, v38
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v25, v25
|
||||
; SI-NEXT: v_add_f32_e32 v9, 0x38000000, v9
|
||||
; SI-NEXT: v_add_f32_e32 v37, 0x38000000, v37
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v37
|
||||
; SI-NEXT: v_or_b32_e32 v54, v25, v58
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v25, v40
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v40, 16, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v51
|
||||
; SI-NEXT: v_add_f32_e32 v37, 0x38000000, v37
|
||||
; SI-NEXT: v_or_b32_e32 v52, v37, v40
|
||||
; SI-NEXT: v_add_f32_e32 v25, 0x38000000, v25
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v37
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v25, v25
|
||||
; SI-NEXT: v_add_f32_e32 v21, 0x38000000, v21
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: v_or_b32_e32 v52, v37, v40
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v30
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v30, 16, v25
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v25, v62
|
||||
@ -47449,22 +47451,22 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v25, v25
|
||||
; SI-NEXT: v_add_f32_e32 v21, 0x38000000, v21
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v29, v49
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v29, v61
|
||||
; SI-NEXT: v_or_b32_e32 v62, v25, v59
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v25, v28
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v28, 16, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v9
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v49
|
||||
; SI-NEXT: v_add_f32_e32 v29, 0x38000000, v29
|
||||
; SI-NEXT: v_add_f32_e32 v25, 0x38000000, v25
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v29, v29
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v25, v25
|
||||
; SI-NEXT: v_add_f32_e32 v21, 0x38000000, v21
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: v_or_b32_e32 v49, v29, v28
|
||||
; SI-NEXT: v_or_b32_e32 v61, v29, v28
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v29, v27
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v27, 16, v25
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v25, v60
|
||||
; SI-NEXT: v_or_b32_e32 v38, v21, v27
|
||||
; SI-NEXT: v_or_b32_e32 v49, v21, v27
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v26
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v26, v7
|
||||
; SI-NEXT: v_add_f32_e32 v25, 0x38000000, v25
|
||||
@ -47478,7 +47480,6 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v25, v32
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v26, 16, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v34
|
||||
; SI-NEXT: v_or_b32_e32 v41, v39, v43
|
||||
; SI-NEXT: v_or_b32_e32 v39, v29, v26
|
||||
; SI-NEXT: v_add_f32_e32 v25, 0x38000000, v25
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v29, v31
|
||||
@ -47492,19 +47493,14 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_or_b32_e32 v34, v21, v25
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v25, v36
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v29, v50
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v9, v63
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v14, v61
|
||||
; SI-NEXT: v_add_f32_e32 v7, 0x38000000, v7
|
||||
; SI-NEXT: v_add_f32_e32 v25, 0x38000000, v25
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v7, v7
|
||||
; SI-NEXT: v_add_f32_e32 v25, 0x38000000, v25
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v36, v25
|
||||
; SI-NEXT: v_add_f32_e32 v25, 0x38000000, v29
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v25, v25
|
||||
; SI-NEXT: v_add_f32_e32 v9, 0x38000000, v9
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v63, v9
|
||||
; SI-NEXT: v_add_f32_e32 v14, 0x38000000, v14
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v31
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v61, v14
|
||||
; SI-NEXT: v_or_b32_e32 v53, v7, v21
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v36
|
||||
; SI-NEXT: v_or_b32_e32 v50, v25, v21
|
||||
@ -47512,17 +47508,18 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_or_b32_e32 v35, v13, v21
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v63
|
||||
; SI-NEXT: v_or_b32_e32 v16, v16, v21
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v61
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v14
|
||||
; SI-NEXT: v_or_b32_e32 v15, v15, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v22
|
||||
; SI-NEXT: s_waitcnt vmcnt(2)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v22, 16, v20
|
||||
; SI-NEXT: v_alignbit_b32 v29, v35, v28, 16
|
||||
; SI-NEXT: v_alignbit_b32 v28, v50, v27, 16
|
||||
; SI-NEXT: v_add_f32_e32 v21, 0x38000000, v21
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: v_alignbit_b32 v27, v53, v60, 16
|
||||
; SI-NEXT: v_mov_b32_e32 v60, v37
|
||||
; SI-NEXT: v_alignbit_b32 v26, v34, v26, 16
|
||||
; SI-NEXT: v_add_f32_e32 v5, 0x38000000, v5
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
|
||||
; SI-NEXT: v_alignbit_b32 v28, v50, v27, 16
|
||||
; SI-NEXT: v_or_b32_e32 v22, v21, v22
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v18
|
||||
; SI-NEXT: v_or_b32_e32 v24, v24, v21
|
||||
@ -47543,15 +47540,19 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_alignbit_b32 v46, v5, v45, 16
|
||||
; SI-NEXT: v_alignbit_b32 v45, v1, v57, 16
|
||||
; SI-NEXT: v_alignbit_b32 v44, v10, v43, 16
|
||||
; SI-NEXT: v_alignbit_b32 v14, v19, v42, 16
|
||||
; SI-NEXT: v_alignbit_b32 v43, v19, v42, 16
|
||||
; SI-NEXT: v_alignbit_b32 v21, v24, v58, 16
|
||||
; SI-NEXT: v_mov_b32_e32 v58, v38
|
||||
; SI-NEXT: v_alignbit_b32 v25, v22, v40, 16
|
||||
; SI-NEXT: v_alignbit_b32 v40, v15, v30, 16
|
||||
; SI-NEXT: v_alignbit_b32 v30, v16, v59, 16
|
||||
; SI-NEXT: v_alignbit_b32 v27, v53, v60, 16
|
||||
; SI-NEXT: v_mov_b32_e32 v60, v37
|
||||
; SI-NEXT: v_alignbit_b32 v26, v34, v26, 16
|
||||
; SI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
|
||||
; SI-NEXT: .LBB58_2: ; %end
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v38, 16, v56
|
||||
; SI-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
||||
@ -47560,7 +47561,7 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v37, 0xffff, v7
|
||||
; SI-NEXT: v_and_b32_e32 v37, 0xffff, v5
|
||||
; SI-NEXT: v_or_b32_e32 v37, v37, v38
|
||||
; SI-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen
|
||||
; SI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
|
||||
@ -47584,8 +47585,10 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_add_i32_e32 v3, vcc, 16, v0
|
||||
; SI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v2, 0xffff, v5
|
||||
; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v8
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
||||
; SI-NEXT: v_or_b32_e32 v2, v2, v3
|
||||
; SI-NEXT: v_add_i32_e32 v3, vcc, 20, v0
|
||||
; SI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
|
||||
@ -47616,7 +47619,7 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v55
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v14
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v43
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 40, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
@ -47658,7 +47661,7 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v15
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v61
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v14
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x44, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
@ -47675,7 +47678,7 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x4c, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v49
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v61
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v29
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x50, v0
|
||||
@ -47687,7 +47690,7 @@ define <56 x i16> @bitcast_v56f16_to_v56i16(<56 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x54, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v58
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v49
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v28
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x58, v0
|
||||
|
||||
@ -51080,79 +51080,79 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:84
|
||||
; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:88
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v55, v3
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v3, v22
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v40, v4
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v52, v6
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v60, v10
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v2
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v6, v13
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v15
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v2, v19
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v3
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v55, v7
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v53, v8
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v8, v9
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v5
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v48, v11
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v49, v12
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v38, v16
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v14
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v17, v17
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v20, v20
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v18
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v16, v22
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v5
|
||||
; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v53, v8
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v8, v10
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v1, v6
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v49, v12
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v6, v13
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v37, v15
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v60, v2
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v2, v18
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v52, v7
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v7, v9
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v48, v11
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v14, v14
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v38, v16
|
||||
; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v2, v19
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v20, v20
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v11, v23
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v5, v24
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v18, v25
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v25, v25
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v24, v26
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v26, v27
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v27, v28
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v25, v29
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v29, v29
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v28, v30
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v17, v17
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: s_waitcnt vmcnt(14)
|
||||
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v51
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v22, v43
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v23, v44
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v29, v61
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v61, v61
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v44, v62
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v62, v63
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v18, v63
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v19, v33
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v61, v36
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v33, v36
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v43, v39
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v15, v50
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v63, v54
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v16, v54
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v54, v41
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v51, v42
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v14, v45
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v13, v46
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v12, v45
|
||||
; SI-NEXT: s_waitcnt vmcnt(13)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v50, v47
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v13, v46
|
||||
; SI-NEXT: s_waitcnt vmcnt(12)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v30, v56
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v50, v47
|
||||
; SI-NEXT: s_waitcnt vmcnt(11)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v12, v57
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v30, v56
|
||||
; SI-NEXT: s_waitcnt vmcnt(10)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v7, v58
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v63, v57
|
||||
; SI-NEXT: s_waitcnt vmcnt(9)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v36, v31
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v62, v58
|
||||
; SI-NEXT: s_waitcnt vmcnt(8)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v45, v32
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v36, v31
|
||||
; SI-NEXT: s_waitcnt vmcnt(7)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v31, v59
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v45, v32
|
||||
; SI-NEXT: s_waitcnt vmcnt(6)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v32, v34
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v31, v59
|
||||
; SI-NEXT: s_waitcnt vmcnt(5)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v33, v35
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v32, v34
|
||||
; SI-NEXT: s_waitcnt vmcnt(4)
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v3, v35
|
||||
; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:92
|
||||
; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:96
|
||||
; SI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:100
|
||||
@ -51177,21 +51177,11 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
||||
; SI-NEXT: s_or_saveexec_b64 s[4:5], s[4:5]
|
||||
; SI-NEXT: v_mov_b32_e32 v58, v8
|
||||
; SI-NEXT: v_mov_b32_e32 v8, v60
|
||||
; SI-NEXT: v_mov_b32_e32 v46, v52
|
||||
; SI-NEXT: v_mov_b32_e32 v52, v55
|
||||
; SI-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
|
||||
; SI-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_mov_b32_e32 v47, v21
|
||||
; SI-NEXT: v_mov_b32_e32 v56, v17
|
||||
; SI-NEXT: v_mov_b32_e32 v57, v6
|
||||
; SI-NEXT: v_mov_b32_e32 v59, v61
|
||||
; SI-NEXT: v_mov_b32_e32 v61, v29
|
||||
; SI-NEXT: v_mov_b32_e32 v29, v25
|
||||
; SI-NEXT: v_mov_b32_e32 v25, v18
|
||||
; SI-NEXT: v_mov_b32_e32 v21, v16
|
||||
; SI-NEXT: v_mov_b32_e32 v17, v1
|
||||
; SI-NEXT: v_mov_b32_e32 v58, v7
|
||||
; SI-NEXT: v_mov_b32_e32 v59, v33
|
||||
; SI-NEXT: s_xor_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: s_cbranch_execz .LBB58_2
|
||||
; SI-NEXT: ; %bb.1: ; %cmp.true
|
||||
@ -51201,14 +51191,12 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v9, v9
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v32, v32
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v31, v31
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v7, v7
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v7, v62
|
||||
; SI-NEXT: v_add_f32_e32 v4, 0x38000000, v4
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v6, v12
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v13, v13
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v6, v63
|
||||
; SI-NEXT: v_add_f32_e32 v3, 0x38000000, v3
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v4, v4
|
||||
; SI-NEXT: v_add_f32_e32 v10, 0x38000000, v10
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v12, v14
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v10, v10
|
||||
; SI-NEXT: v_add_f32_e32 v9, 0x38000000, v9
|
||||
@ -51218,36 +51206,35 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_add_f32_e32 v31, 0x38000000, v31
|
||||
; SI-NEXT: v_add_f32_e32 v7, 0x38000000, v7
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v31, v31
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v7, v7
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v62, v7
|
||||
; SI-NEXT: v_add_f32_e32 v6, 0x38000000, v6
|
||||
; SI-NEXT: v_add_f32_e32 v13, 0x38000000, v13
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v4
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v6, v6
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
|
||||
; SI-NEXT: v_add_f32_e32 v12, 0x38000000, v12
|
||||
; SI-NEXT: v_or_b32_e32 v3, v3, v34
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v10
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v12, v12
|
||||
; SI-NEXT: v_or_b32_e32 v9, v9, v34
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v32
|
||||
; SI-NEXT: v_or_b32_e32 v1, v31, v34
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v7
|
||||
; SI-NEXT: v_or_b32_e32 v6, v6, v34
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v13
|
||||
; SI-NEXT: v_or_b32_e32 v31, v12, v34
|
||||
; SI-NEXT: v_mov_b32_e32 v12, v6
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v16, v63
|
||||
; SI-NEXT: v_or_b32_e32 v31, v31, v34
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v62
|
||||
; SI-NEXT: v_or_b32_e32 v63, v6, v34
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v13, v13
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v12, v12
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v16, v16
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v15, v15
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v19, v19
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v18, v62
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v18, v18
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v23, v23
|
||||
; SI-NEXT: v_add_f32_e32 v13, 0x38000000, v13
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v22, v22
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v27, v27
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
|
||||
; SI-NEXT: v_add_f32_e32 v12, 0x38000000, v12
|
||||
; SI-NEXT: v_add_f32_e32 v16, 0x38000000, v16
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v26, v26
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v63, v16
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v12, v12
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v16, v16
|
||||
; SI-NEXT: v_add_f32_e32 v15, 0x38000000, v15
|
||||
; SI-NEXT: v_add_f32_e32 v19, 0x38000000, v19
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v11, v11
|
||||
@ -51261,11 +51248,13 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v23, v23
|
||||
; SI-NEXT: v_add_f32_e32 v22, 0x38000000, v22
|
||||
; SI-NEXT: v_add_f32_e32 v27, 0x38000000, v27
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v13
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v22, v22
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v27, v27
|
||||
; SI-NEXT: v_add_f32_e32 v26, 0x38000000, v26
|
||||
; SI-NEXT: v_add_f32_e32 v5, 0x38000000, v5
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v63
|
||||
; SI-NEXT: v_or_b32_e32 v12, v12, v34
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v16
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v26, v26
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v5, v5
|
||||
; SI-NEXT: v_add_f32_e32 v11, 0x38000000, v11
|
||||
@ -51275,7 +51264,7 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v11, v11
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v20, v20
|
||||
; SI-NEXT: v_add_f32_e32 v2, 0x38000000, v2
|
||||
; SI-NEXT: v_or_b32_e32 v62, v18, v34
|
||||
; SI-NEXT: v_or_b32_e32 v18, v18, v34
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v23
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
||||
; SI-NEXT: v_or_b32_e32 v22, v22, v34
|
||||
@ -51287,9 +51276,8 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_or_b32_e32 v2, v2, v34
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v34, v38
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v35, v37
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v1, v46
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v8, v8
|
||||
; SI-NEXT: v_add_f32_e32 v34, 0x38000000, v34
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v38, v34
|
||||
; SI-NEXT: v_add_f32_e32 v34, 0x38000000, v35
|
||||
@ -51301,89 +51289,79 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v34, v49
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v35, v48
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v8, v8
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v46, v58
|
||||
; SI-NEXT: v_add_f32_e32 v34, 0x38000000, v34
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v49, v34
|
||||
; SI-NEXT: v_add_f32_e32 v34, 0x38000000, v35
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v34, v34
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v46, v58
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v35, 16, v49
|
||||
; SI-NEXT: v_add_f32_e32 v8, 0x38000000, v8
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v35, 16, v49
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v8, v8
|
||||
; SI-NEXT: v_or_b32_e32 v48, v34, v35
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v34, v53
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v35, v52
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v8, v8
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v17, v17
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v14, v14
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v8, 16, v8
|
||||
; SI-NEXT: v_add_f32_e32 v34, 0x38000000, v34
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v53, v34
|
||||
; SI-NEXT: v_add_f32_e32 v34, 0x38000000, v35
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v34, v34
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v8, 16, v8
|
||||
; SI-NEXT: v_add_f32_e32 v14, 0x38000000, v14
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v35, 16, v53
|
||||
; SI-NEXT: v_add_f32_e32 v17, 0x38000000, v17
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v14, v14
|
||||
; SI-NEXT: v_or_b32_e32 v52, v34, v35
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v34, v40
|
||||
; SI-NEXT: s_waitcnt vmcnt(3)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v35, v55
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v17, v17
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v43, v43
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v42, v42
|
||||
; SI-NEXT: v_add_f32_e32 v34, 0x38000000, v34
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v40, v34
|
||||
; SI-NEXT: v_add_f32_e32 v34, 0x38000000, v35
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v34, v34
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v17, 16, v17
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v24, v24
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v35, 16, v40
|
||||
; SI-NEXT: v_add_f32_e32 v43, 0x38000000, v43
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v28, v28
|
||||
; SI-NEXT: v_or_b32_e32 v55, v34, v35
|
||||
; SI-NEXT: s_waitcnt vmcnt(2)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v34, v60
|
||||
; SI-NEXT: s_waitcnt vmcnt(1)
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v35, v6
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v43, v43
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v43, v43
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v51, v51
|
||||
; SI-NEXT: v_add_f32_e32 v34, 0x38000000, v34
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v34, v34
|
||||
; SI-NEXT: v_add_f32_e32 v35, 0x38000000, v35
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v35, v35
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v43, 16, v43
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v34
|
||||
; SI-NEXT: v_add_f32_e32 v21, 0x38000000, v21
|
||||
; SI-NEXT: v_or_b32_e32 v6, v35, v34
|
||||
; SI-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v24, v24
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v28, v28
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v51, v51
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v30, v30
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v42, v42
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v34, 16, v34
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v41, v41
|
||||
; SI-NEXT: v_or_b32_e32 v6, v35, v34
|
||||
; SI-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v25, v25
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v29, v29
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v54, v54
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v50, v50
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v36, v36
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v33, v33
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v39, v39
|
||||
; SI-NEXT: v_add_f32_e32 v42, 0x38000000, v42
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v42, v42
|
||||
; SI-NEXT: v_add_f32_e32 v24, 0x38000000, v24
|
||||
; SI-NEXT: v_add_f32_e32 v28, 0x38000000, v28
|
||||
; SI-NEXT: v_add_f32_e32 v43, 0x38000000, v43
|
||||
; SI-NEXT: v_add_f32_e32 v51, 0x38000000, v51
|
||||
; SI-NEXT: v_add_f32_e32 v30, 0x38000000, v30
|
||||
; SI-NEXT: v_add_f32_e32 v42, 0x38000000, v42
|
||||
; SI-NEXT: v_add_f32_e32 v41, 0x38000000, v41
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v24, v24
|
||||
; SI-NEXT: v_add_f32_e32 v25, 0x38000000, v25
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v28, v28
|
||||
; SI-NEXT: v_add_f32_e32 v29, 0x38000000, v29
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v43, v43
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v51, v51
|
||||
; SI-NEXT: v_add_f32_e32 v54, 0x38000000, v54
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v30, v30
|
||||
; SI-NEXT: v_add_f32_e32 v50, 0x38000000, v50
|
||||
; SI-NEXT: v_add_f32_e32 v36, 0x38000000, v36
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v42, v42
|
||||
; SI-NEXT: v_add_f32_e32 v33, 0x38000000, v33
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v41, v41
|
||||
; SI-NEXT: v_add_f32_e32 v39, 0x38000000, v39
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v25, v25
|
||||
@ -51391,91 +51369,102 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v54, v54
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v50, v50
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v36, v36
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v33, v33
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v39, v39
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v42, 16, v42
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v24, 16, v24
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v28, 16, v28
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v43, 16, v43
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v51, 16, v51
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v30, 16, v30
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v42, 16, v42
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v41, 16, v41
|
||||
; SI-NEXT: v_or_b32_e32 v25, v25, v24
|
||||
; SI-NEXT: v_or_b32_e32 v29, v29, v28
|
||||
; SI-NEXT: v_or_b32_e32 v54, v54, v51
|
||||
; SI-NEXT: v_or_b32_e32 v50, v50, v30
|
||||
; SI-NEXT: v_or_b32_e32 v33, v33, v42
|
||||
; SI-NEXT: v_or_b32_e32 v39, v39, v41
|
||||
; SI-NEXT: v_alignbit_b32 v60, v55, v34, 16
|
||||
; SI-NEXT: v_alignbit_b32 v24, v26, v24, 16
|
||||
; SI-NEXT: v_alignbit_b32 v28, v22, v28, 16
|
||||
; SI-NEXT: v_alignbit_b32 v30, v12, v30, 16
|
||||
; SI-NEXT: v_alignbit_b32 v42, v9, v42, 16
|
||||
; SI-NEXT: v_alignbit_b32 v51, v12, v51, 16
|
||||
; SI-NEXT: v_alignbit_b32 v30, v63, v30, 16
|
||||
; SI-NEXT: v_alignbit_b32 v41, v3, v41, 16
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v35, v6
|
||||
; SI-NEXT: v_add_f32_e32 v35, 0x38000000, v35
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v35, v35
|
||||
; SI-NEXT: v_or_b32_e32 v6, v35, v1
|
||||
; SI-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
|
||||
; SI-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_add_f32_e32 v35, 0x38000000, v46
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v46, v57
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v35, v35
|
||||
; SI-NEXT: v_alignbit_b32 v1, v52, v1, 16
|
||||
; SI-NEXT: v_add_f32_e32 v46, 0x38000000, v46
|
||||
; SI-NEXT: v_or_b32_e32 v58, v35, v8
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v35, v56
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v46, v46
|
||||
; SI-NEXT: v_alignbit_b32 v8, v48, v8, 16
|
||||
; SI-NEXT: v_add_f32_e32 v35, 0x38000000, v35
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v35, v35
|
||||
; SI-NEXT: v_or_b32_e32 v56, v35, v17
|
||||
; SI-NEXT: v_alignbit_b32 v17, v2, v17, 16
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v14, v6
|
||||
; SI-NEXT: v_add_f32_e32 v14, 0x38000000, v14
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v14, v14
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
||||
; SI-NEXT: v_or_b32_e32 v57, v46, v14
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v46, v47
|
||||
; SI-NEXT: v_add_f32_e32 v35, 0x38000000, v46
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v46, v59
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v35, v35
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v46, v47
|
||||
; SI-NEXT: v_alignbit_b32 v14, v37, v14, 16
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v17, v6
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_add_f32_e32 v17, 0x38000000, v17
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v17, v17
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v17, 16, v17
|
||||
; SI-NEXT: v_or_b32_e32 v56, v35, v17
|
||||
; SI-NEXT: v_add_f32_e32 v35, 0x38000000, v46
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v35, v35
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v46, v59
|
||||
; SI-NEXT: v_add_f32_e32 v46, 0x38000000, v46
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v46, v46
|
||||
; SI-NEXT: v_or_b32_e32 v59, v46, v43
|
||||
; SI-NEXT: v_alignbit_b32 v43, v15, v43, 16
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v21, v6
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_add_f32_e32 v21, 0x38000000, v21
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v21, v21
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v21
|
||||
; SI-NEXT: v_or_b32_e32 v47, v35, v21
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v35, v44
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v44, v61
|
||||
; SI-NEXT: v_or_b32_e32 v59, v46, v43
|
||||
; SI-NEXT: v_alignbit_b32 v46, v52, v1, 16
|
||||
; SI-NEXT: v_alignbit_b32 v1, v37, v14, 16
|
||||
; SI-NEXT: v_mov_b32_e32 v14, v31
|
||||
; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_add_f32_e32 v35, 0x38000000, v35
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v35, v35
|
||||
; SI-NEXT: v_add_f32_e32 v44, 0x38000000, v44
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v44, v44
|
||||
; SI-NEXT: v_alignbit_b32 v21, v11, v21, 16
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v35, 16, v35
|
||||
; SI-NEXT: v_alignbit_b32 v43, v15, v43, 16
|
||||
; SI-NEXT: v_or_b32_e32 v61, v44, v35
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v44, v45
|
||||
; SI-NEXT: v_alignbit_b32 v51, v14, v51, 16
|
||||
; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
|
||||
; SI-NEXT: v_add_f32_e32 v44, 0x38000000, v44
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v44, v44
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v45, 16, v44
|
||||
; SI-NEXT: v_or_b32_e32 v36, v36, v45
|
||||
; SI-NEXT: v_alignbit_b32 v44, v62, v35, 16
|
||||
; SI-NEXT: s_waitcnt vmcnt(1)
|
||||
; SI-NEXT: v_alignbit_b32 v44, v18, v35, 16
|
||||
; SI-NEXT: v_alignbit_b32 v45, v31, v45, 16
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_cvt_f32_f16_e32 v33, v6
|
||||
; SI-NEXT: v_add_f32_e32 v33, 0x38000000, v33
|
||||
; SI-NEXT: v_cvt_f16_f32_e32 v33, v33
|
||||
; SI-NEXT: v_or_b32_e32 v6, v33, v42
|
||||
; SI-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_alignbit_b32 v6, v2, v17, 16
|
||||
; SI-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_alignbit_b32 v6, v11, v21, 16
|
||||
; SI-NEXT: v_alignbit_b32 v42, v9, v42, 16
|
||||
; SI-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
|
||||
; SI-NEXT: .LBB58_2: ; %end
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
|
||||
; SI-NEXT: s_waitcnt vmcnt(1)
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v35, 16, v60
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v46
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v35, 16, v60
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v8, 16, v8
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v34, 0xffff, v6
|
||||
@ -51487,7 +51476,7 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_or_b32_e32 v34, v34, v35
|
||||
; SI-NEXT: v_add_i32_e32 v35, vcc, 4, v0
|
||||
; SI-NEXT: buffer_store_dword v34, v35, s[0:3], 0 offen
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v34, 0xffff, v6
|
||||
; SI-NEXT: v_or_b32_e32 v1, v34, v1
|
||||
@ -51510,11 +51499,9 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v8
|
||||
; SI-NEXT: v_add_i32_e32 v8, vcc, 20, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v8, s[0:3], 0 offen
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v57
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v8, 16, v6
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v8, 16, v14
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v8
|
||||
; SI-NEXT: v_add_i32_e32 v8, vcc, 24, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v8, s[0:3], 0 offen
|
||||
@ -51524,9 +51511,11 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v8
|
||||
; SI-NEXT: v_add_i32_e32 v8, vcc, 28, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v8, s[0:3], 0 offen
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v56
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v8, 16, v17
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v8, 16, v6
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v8
|
||||
; SI-NEXT: v_add_i32_e32 v8, vcc, 32, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v8, s[0:3], 0 offen
|
||||
@ -51536,9 +51525,11 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 36, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v47
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v21
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 40, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
@ -51579,7 +51570,7 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 64, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v62
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v18
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v19
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x44, v0
|
||||
@ -51592,7 +51583,7 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v15
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v63
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v16
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x4c, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
@ -51603,7 +51594,7 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x50, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v14
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v12
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v13
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x54, v0
|
||||
@ -51615,8 +51606,8 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x58, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v12
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v7
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v63
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v62
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x5c, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
@ -51633,8 +51624,10 @@ define <60 x i16> @bitcast_v60f16_to_v60i16(<60 x half> %a, i32 %b) {
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x64, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v33
|
||||
; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v42
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||
; SI-NEXT: v_or_b32_e32 v1, v1, v2
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 0x68, v0
|
||||
; SI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
|
||||
|
||||
@ -42,11 +42,14 @@ body: |
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $sgpr18_sgpr19 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr20_sgpr21 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr22_sgpr23 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
|
||||
; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5)
|
||||
; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec
|
||||
; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.3, align 4, addrspace 5)
|
||||
; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr56 = S_MOV_B32 0
|
||||
; CHECK-NEXT: renamable $sgpr24_sgpr25 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
|
||||
; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr57 = S_MOV_B32 1083786240
|
||||
@ -55,7 +58,7 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.17(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr100_sgpr101, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_1024_align2 = COPY [[COPY]]
|
||||
@ -64,7 +67,7 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.5(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
|
||||
; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr72
|
||||
@ -92,12 +95,12 @@ body: |
|
||||
; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
|
||||
; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr56_sgpr57
|
||||
; CHECK-NEXT: renamable $sgpr54 = COPY killed renamable $sgpr76
|
||||
; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58 = COPY renamable $sgpr52_sgpr53_sgpr54
|
||||
; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 = COPY killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51
|
||||
; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
|
||||
; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
|
||||
; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54 = COPY renamable $sgpr56_sgpr57_sgpr58
|
||||
; CHECK-NEXT: renamable $sgpr55 = COPY killed renamable $sgpr76
|
||||
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 = COPY killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51
|
||||
; CHECK-NEXT: renamable $sgpr48_sgpr49_sgpr50 = COPY renamable $sgpr52_sgpr53_sgpr54
|
||||
; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
|
||||
; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54 = COPY renamable $sgpr48_sgpr49_sgpr50
|
||||
; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
|
||||
; CHECK-NEXT: renamable $sgpr55 = COPY killed renamable $sgpr68
|
||||
; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
|
||||
; CHECK-NEXT: renamable $sgpr56 = COPY killed renamable $sgpr72
|
||||
; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
|
||||
@ -162,22 +165,23 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.5:
|
||||
; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.6(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 renamable $sgpr22_sgpr23, undef renamable $sgpr54_sgpr55, implicit-def dead $scc
|
||||
; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.3, align 4, addrspace 5)
|
||||
; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 killed renamable $sgpr12_sgpr13, undef renamable $sgpr54_sgpr55, implicit-def dead $scc
|
||||
; CHECK-NEXT: renamable $sgpr54_sgpr55 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
|
||||
; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr12_sgpr13
|
||||
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.6:
|
||||
; CHECK-NEXT: successors: %bb.7(0x80000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr34_sgpr35, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.7:
|
||||
; CHECK-NEXT: successors: %bb.8(0x80000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $sgpr64_sgpr65 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: renamable $sgpr66_sgpr67 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
|
||||
@ -185,14 +189,14 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.8:
|
||||
; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.9(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr64_sgpr65, implicit-def dead $scc
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.10, implicit $vcc
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.9:
|
||||
; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.17(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr84_sgpr85, implicit $exec
|
||||
@ -214,17 +218,11 @@ body: |
|
||||
; CHECK-NEXT: renamable $sgpr83 = COPY killed renamable $sgpr15
|
||||
; CHECK-NEXT: renamable $sgpr85 = COPY killed renamable $sgpr14
|
||||
; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr18_sgpr19
|
||||
; CHECK-NEXT: renamable $sgpr50_sgpr51 = COPY killed renamable $sgpr20_sgpr21
|
||||
; CHECK-NEXT: renamable $sgpr36_sgpr37 = COPY killed renamable $sgpr22_sgpr23
|
||||
; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY killed renamable $sgpr24_sgpr25
|
||||
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13
|
||||
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
||||
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
||||
; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83
|
||||
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9
|
||||
; CHECK-NEXT: renamable $sgpr24_sgpr25 = COPY killed renamable $sgpr38_sgpr39
|
||||
; CHECK-NEXT: renamable $sgpr22_sgpr23 = COPY killed renamable $sgpr36_sgpr37
|
||||
; CHECK-NEXT: renamable $sgpr20_sgpr21 = COPY killed renamable $sgpr50_sgpr51
|
||||
; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr48_sgpr49
|
||||
; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr85
|
||||
; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr83
|
||||
@ -240,42 +238,44 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.10:
|
||||
; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.12(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.12
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.11:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.17(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.17
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.12:
|
||||
; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.13(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr54_sgpr55
|
||||
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.13:
|
||||
; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.14(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr24_sgpr25, implicit-def dead $scc
|
||||
; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
|
||||
; CHECK-NEXT: $vcc = S_AND_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.15, implicit $vcc
|
||||
; CHECK-NEXT: S_BRANCH %bb.14
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.14:
|
||||
; CHECK-NEXT: successors: %bb.15(0x80000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.15:
|
||||
; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.16(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr20_sgpr21, implicit-def dead $scc
|
||||
; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
|
||||
; CHECK-NEXT: $vcc = S_AND_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
|
||||
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.11, implicit $vcc
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.16:
|
||||
|
||||
@ -42,13 +42,11 @@ body: |
|
||||
%24:sgpr_128 = COPY %1
|
||||
%25:sgpr_128 = COPY %1
|
||||
%26:sgpr_128 = COPY %1
|
||||
%27:sgpr_128 = COPY %1
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103
|
||||
|
||||
%27 = IMPLICIT_DEF implicit-def $exec
|
||||
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
||||
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
@ -57,7 +55,6 @@ body: |
|
||||
bb.2:
|
||||
liveins: $sgpr98_sgpr99, $sgpr102_sgpr103
|
||||
|
||||
%27 = IMPLICIT_DEF implicit-def $exec
|
||||
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
||||
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
@ -66,7 +63,6 @@ body: |
|
||||
bb.3:
|
||||
liveins: $sgpr102_sgpr103
|
||||
|
||||
%27 = IMPLICIT_DEF implicit-def $exec
|
||||
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
||||
S_BRANCH %bb.4
|
||||
@ -85,7 +81,6 @@ body: |
|
||||
S_CMP_EQ_U64 %21.sub0_sub1, %22.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %23.sub0_sub1, %24.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %25.sub0_sub1, %26.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %26.sub0_sub1, %27.sub2_sub3, implicit-def $scc
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit %0, implicit $vgpr0
|
||||
...
|
||||
|
||||
@ -1,167 +0,0 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
|
||||
# RUN: llc -mtriple=amdgcn-- -verify-machineinstrs -run-pass=greedy -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
# Check that spill save/restore should be inserted after $exec mask is defined.
|
||||
|
||||
name: foo
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
body: |
|
||||
; CHECK-LABEL: name: foo
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr100_sgpr101, $sgpr102_sgpr103
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr102_sgpr103
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.0, align 4, addrspace 5)
|
||||
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.1, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.1, align 4, addrspace 5)
|
||||
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.2, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.2, align 4, addrspace 5)
|
||||
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.3, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.3, align 4, addrspace 5)
|
||||
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.4, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.4, align 4, addrspace 5)
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY19:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY20:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: [[COPY21:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: S_BRANCH %bb.1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr98_sgpr99, $sgpr102_sgpr103
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[S_OR_SAVEEXEC_B64_1:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_1]], implicit-def $scc
|
||||
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.4
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x80000000)
|
||||
; CHECK-NEXT: liveins: $sgpr102_sgpr103
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[S_OR_SAVEEXEC_B64_1:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_1]], implicit-def $scc
|
||||
; CHECK-NEXT: S_BRANCH %bb.4
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.4:
|
||||
; CHECK-NEXT: $exec = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.0, align 4, addrspace 5)
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY1]].sub0_sub1, [[SI_SPILL_S128_RESTORE]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE1:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.1, align 4, addrspace 5)
|
||||
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE2:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.2, align 4, addrspace 5)
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[SI_SPILL_S128_RESTORE1]].sub0_sub1, [[SI_SPILL_S128_RESTORE2]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE3:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.3, align 4, addrspace 5)
|
||||
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE4:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.4, align 4, addrspace 5)
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[SI_SPILL_S128_RESTORE3]].sub0_sub1, [[SI_SPILL_S128_RESTORE4]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY2]].sub0_sub1, [[COPY3]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY4]].sub0_sub1, [[COPY5]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY6]].sub0_sub1, [[COPY7]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY8]].sub0_sub1, [[COPY9]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY10]].sub0_sub1, [[COPY11]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY12]].sub0_sub1, [[COPY13]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY14]].sub0_sub1, [[COPY15]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY16]].sub0_sub1, [[COPY17]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY18]].sub0_sub1, [[COPY19]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY20]].sub0_sub1, [[COPY21]].sub2_sub3, implicit-def $scc
|
||||
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit [[S_OR_SAVEEXEC_B64_1]], implicit $vgpr0
|
||||
bb.0:
|
||||
liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr100_sgpr101, $sgpr102_sgpr103
|
||||
|
||||
%0:sreg_64 = COPY $sgpr102_sgpr103
|
||||
%1:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
||||
%2:sgpr_128 = COPY %1
|
||||
%3:sgpr_128 = COPY %1
|
||||
%4:sgpr_128 = COPY %1
|
||||
%5:sgpr_128 = COPY %1
|
||||
%6:sgpr_128 = COPY %1
|
||||
%7:sgpr_128 = COPY %1
|
||||
%8:sgpr_128 = COPY %1
|
||||
%9:sgpr_128 = COPY %1
|
||||
%10:sgpr_128 = COPY %1
|
||||
%11:sgpr_128 = COPY %1
|
||||
%12:sgpr_128 = COPY %1
|
||||
%13:sgpr_128 = COPY %1
|
||||
%14:sgpr_128 = COPY %1
|
||||
%15:sgpr_128 = COPY %1
|
||||
%16:sgpr_128 = COPY %1
|
||||
%17:sgpr_128 = COPY %1
|
||||
%18:sgpr_128 = COPY %1
|
||||
%19:sgpr_128 = COPY %1
|
||||
%20:sgpr_128 = COPY %1
|
||||
%21:sgpr_128 = COPY %1
|
||||
%22:sgpr_128 = COPY %1
|
||||
%23:sgpr_128 = COPY %1
|
||||
%24:sgpr_128 = COPY %1
|
||||
%25:sgpr_128 = COPY %1
|
||||
%26:sgpr_128 = COPY %1
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103
|
||||
|
||||
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
||||
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
liveins: $sgpr98_sgpr99, $sgpr102_sgpr103
|
||||
|
||||
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
||||
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
S_BRANCH %bb.4
|
||||
|
||||
bb.3:
|
||||
liveins: $sgpr102_sgpr103
|
||||
|
||||
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
||||
S_BRANCH %bb.4
|
||||
|
||||
bb.4:
|
||||
$exec = IMPLICIT_DEF
|
||||
S_CMP_EQ_U64 %1.sub0_sub1, %2.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %3.sub0_sub1, %4.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %5.sub0_sub1, %6.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %7.sub0_sub1, %8.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %9.sub0_sub1, %10.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %11.sub0_sub1, %12.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %13.sub0_sub1, %14.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %15.sub0_sub1, %16.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %17.sub0_sub1, %18.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %19.sub0_sub1, %20.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %21.sub0_sub1, %22.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %23.sub0_sub1, %24.sub2_sub3, implicit-def $scc
|
||||
S_CMP_EQ_U64 %25.sub0_sub1, %26.sub2_sub3, implicit-def $scc
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit %0, implicit $vgpr0
|
||||
...
|
||||
@ -9742,122 +9742,170 @@ entry:
|
||||
define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
; GFX6-LABEL: test_limited_sgpr:
|
||||
; GFX6: ; %bb.0: ; %entry
|
||||
; GFX6-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x9
|
||||
; GFX6-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0
|
||||
; GFX6-NEXT: v_mbcnt_hi_u32_b32_e32 v0, -1, v0
|
||||
; GFX6-NEXT: s_mov_b32 s18, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v6, 0
|
||||
; GFX6-NEXT: s_mov_b32 s19, 0xf000
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 s[16:17], s[14:15]
|
||||
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 8, v0
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:240
|
||||
; GFX6-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
|
||||
; GFX6-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
|
||||
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
||||
; GFX6-NEXT: s_mov_b32 s42, -1
|
||||
; GFX6-NEXT: s_mov_b32 s43, 0xe8f000
|
||||
; GFX6-NEXT: s_add_u32 s40, s40, s11
|
||||
; GFX6-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0
|
||||
; GFX6-NEXT: s_addc_u32 s41, s41, 0
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x85e00
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[12:15], v[5:6], s[16:19], 0 addr64 offset:32
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[16:19], v[5:6], s[16:19], 0 addr64 offset:48
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(2)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: v_mbcnt_hi_u32_b32_e32 v0, -1, v0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v6, 0
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], exec
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: s_mov_b64 exec, 15
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_writelane_b32 v1, s0, 0
|
||||
; GFX6-NEXT: v_writelane_b32 v1, s1, 1
|
||||
; GFX6-NEXT: v_writelane_b32 v1, s2, 2
|
||||
; GFX6-NEXT: v_writelane_b32 v1, s3, 3
|
||||
; GFX6-NEXT: s_mov_b32 s8, 0x80400
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:224
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x85a00
|
||||
; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:208
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x85600
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
|
||||
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 8, v0
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:240
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x86a00
|
||||
; GFX6-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:192
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x85200
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:224
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x86600
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:176
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x84e00
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:208
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x86200
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:160
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x84a00
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:192
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x85e00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:144
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x84600
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:176
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x85a00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:128
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x84200
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:160
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x85600
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:112
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:144
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x85200
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:128
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x84e00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:112
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x84a00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:96
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x84600
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:80
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x84200
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:64
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x83a00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[5:6], s[4:7], 0 addr64 offset:16
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x83200
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[5:6], s[4:7], 0 addr64 offset:32
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x83600
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
|
||||
; GFX6-NEXT: s_mov_b64 exec, 15
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s0, 0
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s1, 1
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s2, 2
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s3, 3
|
||||
; GFX6-NEXT: s_mov_b32 s10, 0x80800
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s10 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[8:9]
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[5:6], s[4:7], 0 addr64 offset:48
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x83e00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:96
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x83a00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:80
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x83600
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[20:23], v[5:6], s[16:19], 0 addr64 offset:64
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64
|
||||
; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[5:6], s[16:19], 0 addr64 offset:16
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x83200
|
||||
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 13, v0
|
||||
; GFX6-NEXT: v_add_i32_e32 v4, vcc, 16, v4
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 13, v0
|
||||
; GFX6-NEXT: v_add_i32_e32 v4, vcc, 16, v4
|
||||
; GFX6-NEXT: s_waitcnt expcnt(3)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v7, 1
|
||||
; GFX6-NEXT: s_mov_b64 s[0:1], exec
|
||||
@ -9876,77 +9924,23 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s9, 5
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s10, 6
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s11, 7
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x80400
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[0:1]
|
||||
; GFX6-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ; def s[4:11]
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: s_mov_b64 exec, 0xff
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s4, 0
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s5, 1
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s6, 2
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s7, 3
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s8, 4
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s9, 5
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s10, 6
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s11, 7
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x80c00
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[0:1]
|
||||
; GFX6-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ; def s[4:11]
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: s_mov_b64 exec, 0xff
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s4, 0
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s5, 1
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s6, 2
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s7, 3
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s8, 4
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s9, 5
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s10, 6
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s11, 7
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x81400
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[0:1]
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ; def s[0:7]
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 0xff
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s0, 0
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s1, 1
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s2, 2
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s3, 3
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s4, 4
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s5, 5
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s6, 6
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s7, 7
|
||||
; GFX6-NEXT: s_mov_b32 s10, 0x81c00
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s10 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[8:9]
|
||||
; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ; def s[8:15]
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ; def s[16:23]
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ; def s[24:31]
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ; def s[0:3]
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
@ -9956,28 +9950,33 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: ; def s33
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; GFX6-NEXT: s_mov_b64 vcc, s[6:7]
|
||||
; GFX6-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GFX6-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX6-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 15
|
||||
; GFX6-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 0xff
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s12, 0
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s13, 1
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s14, 2
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s15, 3
|
||||
; GFX6-NEXT: s_mov_b32 s10, 0x82400
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s10 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s8, 0
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s9, 1
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s10, 2
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s11, 3
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s12, 4
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s13, 5
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s14, 6
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s15, 7
|
||||
; GFX6-NEXT: s_mov_b32 s34, 0x81400
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[8:9]
|
||||
; GFX6-NEXT: s_mov_b64 s[20:21], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GFX6-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 0xff
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_mov_b32 s22, 0x80400
|
||||
; GFX6-NEXT: s_mov_b32 s34, 0x80c00
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s22 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: v_readlane_b32 s8, v4, 0
|
||||
; GFX6-NEXT: v_readlane_b32 s9, v4, 1
|
||||
@ -9989,27 +9988,31 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: v_readlane_b32 s15, v4, 7
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[20:21]
|
||||
; GFX6-NEXT: s_mov_b64 s[20:21], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 15
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GFX6-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 0xff
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s16, 0
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s17, 1
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s18, 2
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s19, 3
|
||||
; GFX6-NEXT: s_mov_b32 s22, 0x82c00
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s22 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s20, 4
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s21, 5
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s22, 6
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s23, 7
|
||||
; GFX6-NEXT: s_mov_b32 s34, 0x81c00
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[20:21]
|
||||
; GFX6-NEXT: s_mov_b64 s[24:25], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GFX6-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 0xff
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_mov_b32 s26, 0x80c00
|
||||
; GFX6-NEXT: s_mov_b32 s34, 0x81400
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s26 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: v_readlane_b32 s16, v4, 0
|
||||
; GFX6-NEXT: v_readlane_b32 s17, v4, 1
|
||||
@ -10021,13 +10024,31 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: v_readlane_b32 s23, v4, 7
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[24:25]
|
||||
; GFX6-NEXT: s_mov_b64 s[34:35], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GFX6-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 0xff
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_mov_b32 s36, 0x81400
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s36 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s24, 0
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s25, 1
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s26, 2
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s27, 3
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s28, 4
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s29, 5
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s30, 6
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s31, 7
|
||||
; GFX6-NEXT: s_mov_b32 s34, 0x82400
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GFX6-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 0xff
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_mov_b32 s34, 0x81c00
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: v_readlane_b32 s24, v4, 0
|
||||
; GFX6-NEXT: v_readlane_b32 s25, v4, 1
|
||||
@ -10039,8 +10060,8 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: v_readlane_b32 s31, v4, 7
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[34:35]
|
||||
; GFX6-NEXT: s_mov_b64 s[34:35], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GFX6-NEXT: s_mov_b64 s[6:7], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 15
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
@ -10048,12 +10069,12 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s1, 1
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s2, 2
|
||||
; GFX6-NEXT: v_writelane_b32 v4, s3, 3
|
||||
; GFX6-NEXT: s_mov_b32 s36, 0x82800
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s36 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_mov_b32 s34, 0x82c00
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[34:35]
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; GFX6-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 3
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
@ -10066,11 +10087,10 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[0:1]
|
||||
; GFX6-NEXT: s_mov_b64 vcc, s[6:7]
|
||||
; GFX6-NEXT: s_mov_b64 s[34:35], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 0xff
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_mov_b32 s36, 0x81c00
|
||||
; GFX6-NEXT: s_mov_b32 s36, 0x82400
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s36 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -10088,7 +10108,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: s_mov_b64 s[34:35], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 15
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_mov_b32 s44, 0x82800
|
||||
; GFX6-NEXT: s_mov_b32 s44, 0x82c00
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s44 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -10114,79 +10134,8 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ; use s[8:15],s[16:23],s[24:31],s[0:7],s[36:39],s[34:35]
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: s_mov_b64 s[6:7], vcc
|
||||
; GFX6-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 15
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x82c00
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s2 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: v_readlane_b32 s16, v4, 0
|
||||
; GFX6-NEXT: v_readlane_b32 s17, v4, 1
|
||||
; GFX6-NEXT: v_readlane_b32 s18, v4, 2
|
||||
; GFX6-NEXT: v_readlane_b32 s19, v4, 3
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[0:1]
|
||||
; GFX6-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 15
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0x82400
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s2 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: v_readlane_b32 s12, v4, 0
|
||||
; GFX6-NEXT: v_readlane_b32 s13, v4, 1
|
||||
; GFX6-NEXT: v_readlane_b32 s14, v4, 2
|
||||
; GFX6-NEXT: v_readlane_b32 s15, v4, 3
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[0:1]
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x86200
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x86600
|
||||
; GFX6-NEXT: buffer_store_dword v12, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v13, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v14, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v15, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x86a00
|
||||
; GFX6-NEXT: s_waitcnt expcnt(4)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v20
|
||||
; GFX6-NEXT: buffer_store_dword v16, off, s[40:43], s0 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v17, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v18, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: buffer_store_dword v19, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, v21
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, v22
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, v23
|
||||
; GFX6-NEXT: s_waitcnt expcnt(3)
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: buffer_load_dword v16, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt expcnt(2)
|
||||
; GFX6-NEXT: buffer_load_dword v17, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt expcnt(1)
|
||||
; GFX6-NEXT: buffer_load_dword v18, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v19, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x86600
|
||||
; GFX6-NEXT: v_mov_b32_e32 v23, v3
|
||||
; GFX6-NEXT: buffer_load_dword v12, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v13, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v14, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v15, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x86200
|
||||
; GFX6-NEXT: v_mov_b32_e32 v22, v2
|
||||
; GFX6-NEXT: v_mov_b32_e32 v21, v1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v20, v0
|
||||
; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
@ -10200,16 +10149,69 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: ;;#ASMSTART
|
||||
; GFX6-NEXT: ;;#ASMEND
|
||||
; GFX6-NEXT: .LBB1_2: ; %ret
|
||||
; GFX6-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x85e00
|
||||
; GFX6-NEXT: s_or_b64 exec, exec, vcc
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 15
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0x80400
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s6 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: v_readlane_b32 s0, v4, 0
|
||||
; GFX6-NEXT: v_readlane_b32 s1, v4, 1
|
||||
; GFX6-NEXT: v_readlane_b32 s2, v4, 2
|
||||
; GFX6-NEXT: v_readlane_b32 s3, v4, 3
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GFX6-NEXT: s_mov_b64 s[36:37], s[0:1]
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], exec
|
||||
; GFX6-NEXT: s_mov_b64 exec, 15
|
||||
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0x80800
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s6 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: v_readlane_b32 s0, v4, 0
|
||||
; GFX6-NEXT: v_readlane_b32 s1, v4, 1
|
||||
; GFX6-NEXT: v_readlane_b32 s2, v4, 2
|
||||
; GFX6-NEXT: v_readlane_b32 s3, v4, 3
|
||||
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x86a00
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b64 s[38:39], s[2:3]
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x86600
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:240
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x86200
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:224
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x85e00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:208
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b64 s[14:15], s[18:19]
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x85a00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:240
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:192
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
@ -10217,7 +10219,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x85600
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:224
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:176
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
@ -10225,7 +10227,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x85200
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:208
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:160
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
@ -10233,7 +10235,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x84e00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:192
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:144
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
@ -10241,7 +10243,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x84a00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:176
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:128
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
@ -10249,7 +10251,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x84600
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:160
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:112
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
@ -10257,15 +10259,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x84200
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:144
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x83e00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:128
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:96
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
@ -10273,7 +10267,15 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x83a00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:112
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:80
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x83e00
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:64
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
@ -10281,7 +10283,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x83600
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:96
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:48
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
@ -10289,18 +10291,15 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0x83200
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:80
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[20:23], v[5:6], s[12:15], 0 addr64 offset:64
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[16:19], v[5:6], s[12:15], 0 addr64 offset:48
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], v[5:6], s[12:15], 0 addr64 offset:32
|
||||
; GFX6-NEXT: s_waitcnt expcnt(3)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:32
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[12:15], 0 addr64 offset:16
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[12:15], 0 addr64
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:16
|
||||
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[36:39], 0 addr64
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-FLATSCR-LABEL: test_limited_sgpr:
|
||||
|
||||
@ -31,23 +31,22 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
|
||||
; CHECK-NEXT: .cfi_offset %edi, -16
|
||||
; CHECK-NEXT: .cfi_offset %ebx, -12
|
||||
; CHECK-NEXT: .cfi_offset %ebp, -8
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
|
||||
; CHECK-NEXT: testb $1, %bl
|
||||
; CHECK-NEXT: je LBB0_7
|
||||
; CHECK-NEXT: je LBB0_25
|
||||
; CHECK-NEXT: ## %bb.1: ## %bb116.i
|
||||
; CHECK-NEXT: je LBB0_7
|
||||
; CHECK-NEXT: je LBB0_25
|
||||
; CHECK-NEXT: ## %bb.2: ## %bb52.i.i
|
||||
; CHECK-NEXT: je LBB0_7
|
||||
; CHECK-NEXT: je LBB0_25
|
||||
; CHECK-NEXT: ## %bb.3: ## %bb142.i
|
||||
; CHECK-NEXT: je LBB0_7
|
||||
; CHECK-NEXT: je LBB0_25
|
||||
; CHECK-NEXT: ## %bb.4:
|
||||
; CHECK-NEXT: movl %eax, %esi
|
||||
; CHECK-NEXT: movl L_.str89$non_lazy_ptr, %edi
|
||||
; CHECK-NEXT: movb $1, %bh
|
||||
; CHECK-NEXT: movl L_.str$non_lazy_ptr, %ebp
|
||||
; CHECK-NEXT: jmp LBB0_5
|
||||
; CHECK-NEXT: LBB0_23: ## %bb7806
|
||||
; CHECK-NEXT: LBB0_21: ## %bb7806
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: Ltmp16: ## EH_LABEL
|
||||
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
@ -58,50 +57,50 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
|
||||
; CHECK-NEXT: LBB0_5: ## %bb3261
|
||||
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: cmpl $37, 0
|
||||
; CHECK-NEXT: jne LBB0_6
|
||||
; CHECK-NEXT: ## %bb.8: ## %bb3306
|
||||
; CHECK-NEXT: jne LBB0_25
|
||||
; CHECK-NEXT: ## %bb.6: ## %bb3306
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: Ltmp0: ## EH_LABEL
|
||||
; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl $0, (%esp)
|
||||
; CHECK-NEXT: calll __ZN12wxStringBaseaSEPKw
|
||||
; CHECK-NEXT: Ltmp1: ## EH_LABEL
|
||||
; CHECK-NEXT: ## %bb.9: ## %bb3314
|
||||
; CHECK-NEXT: ## %bb.7: ## %bb3314
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: movl 0, %eax
|
||||
; CHECK-NEXT: cmpl $121, %eax
|
||||
; CHECK-NEXT: ja LBB0_6
|
||||
; CHECK-NEXT: ## %bb.10: ## %bb3314
|
||||
; CHECK-NEXT: ja LBB0_25
|
||||
; CHECK-NEXT: ## %bb.8: ## %bb3314
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: jmpl *LJTI0_0(,%eax,4)
|
||||
; CHECK-NEXT: LBB0_12: ## %bb5809
|
||||
; CHECK-NEXT: LBB0_10: ## %bb5809
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: jne LBB0_6
|
||||
; CHECK-NEXT: ## %bb.13: ## %bb5809
|
||||
; CHECK-NEXT: jne LBB0_25
|
||||
; CHECK-NEXT: ## %bb.11: ## %bb5809
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: testb %bh, %bh
|
||||
; CHECK-NEXT: je LBB0_6
|
||||
; CHECK-NEXT: ## %bb.14: ## %bb91.i8504
|
||||
; CHECK-NEXT: je LBB0_25
|
||||
; CHECK-NEXT: ## %bb.12: ## %bb91.i8504
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: testb $1, %bl
|
||||
; CHECK-NEXT: je LBB0_16
|
||||
; CHECK-NEXT: ## %bb.15: ## %bb155.i8541
|
||||
; CHECK-NEXT: je LBB0_14
|
||||
; CHECK-NEXT: ## %bb.13: ## %bb155.i8541
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: Ltmp4: ## EH_LABEL
|
||||
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl $0, (%esp)
|
||||
; CHECK-NEXT: calll _gmtime_r
|
||||
; CHECK-NEXT: Ltmp5: ## EH_LABEL
|
||||
; CHECK-NEXT: LBB0_16: ## %bb182.i8560
|
||||
; CHECK-NEXT: LBB0_14: ## %bb182.i8560
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: testb $1, %bl
|
||||
; CHECK-NEXT: je LBB0_17
|
||||
; CHECK-NEXT: ## %bb.18: ## %bb278.i8617
|
||||
; CHECK-NEXT: je LBB0_15
|
||||
; CHECK-NEXT: ## %bb.16: ## %bb278.i8617
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: je LBB0_20
|
||||
; CHECK-NEXT: ## %bb.19: ## %bb440.i8663
|
||||
; CHECK-NEXT: je LBB0_18
|
||||
; CHECK-NEXT: ## %bb.17: ## %bb440.i8663
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: Ltmp6: ## EH_LABEL
|
||||
; CHECK-NEXT: movl L_.str4$non_lazy_ptr, %eax
|
||||
@ -114,11 +113,11 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
|
||||
; CHECK-NEXT: movl $1717, {{[0-9]+}}(%esp) ## imm = 0x6B5
|
||||
; CHECK-NEXT: calll __Z10wxOnAssertPKwiPKcS0_S0_
|
||||
; CHECK-NEXT: Ltmp7: ## EH_LABEL
|
||||
; CHECK-NEXT: jmp LBB0_20
|
||||
; CHECK-NEXT: LBB0_17: ## %bb187.i8591
|
||||
; CHECK-NEXT: jmp LBB0_18
|
||||
; CHECK-NEXT: LBB0_15: ## %bb187.i8591
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: jne LBB0_6
|
||||
; CHECK-NEXT: LBB0_20: ## %invcont5814
|
||||
; CHECK-NEXT: jne LBB0_25
|
||||
; CHECK-NEXT: LBB0_18: ## %invcont5814
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: Ltmp8: ## EH_LABEL
|
||||
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
@ -127,7 +126,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
|
||||
; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz
|
||||
; CHECK-NEXT: subl $4, %esp
|
||||
; CHECK-NEXT: Ltmp9: ## EH_LABEL
|
||||
; CHECK-NEXT: ## %bb.21: ## %invcont5831
|
||||
; CHECK-NEXT: ## %bb.19: ## %invcont5831
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
|
||||
; CHECK-NEXT: Ltmp10: ## EH_LABEL
|
||||
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
@ -137,7 +136,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
|
||||
; CHECK-NEXT: calll __ZN12wxStringBase10ConcatSelfEmPKwm
|
||||
; CHECK-NEXT: Ltmp11: ## EH_LABEL
|
||||
; CHECK-NEXT: jmp LBB0_5
|
||||
; CHECK-NEXT: LBB0_11: ## %bb5657
|
||||
; CHECK-NEXT: LBB0_9: ## %bb5657
|
||||
; CHECK-NEXT: Ltmp13: ## EH_LABEL
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
@ -145,8 +144,8 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
|
||||
; CHECK-NEXT: movl %eax, (%esp)
|
||||
; CHECK-NEXT: calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE
|
||||
; CHECK-NEXT: Ltmp14: ## EH_LABEL
|
||||
; CHECK-NEXT: jmp LBB0_6
|
||||
; CHECK-NEXT: LBB0_22: ## %bb5968
|
||||
; CHECK-NEXT: jmp LBB0_25
|
||||
; CHECK-NEXT: LBB0_20: ## %bb5968
|
||||
; CHECK-NEXT: Ltmp2: ## EH_LABEL
|
||||
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
@ -154,24 +153,23 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
|
||||
; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz
|
||||
; CHECK-NEXT: subl $4, %esp
|
||||
; CHECK-NEXT: Ltmp3: ## EH_LABEL
|
||||
; CHECK-NEXT: LBB0_6: ## %bb3267
|
||||
; CHECK-NEXT: LBB0_25: ## %bb115.critedge.i
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: LBB0_7: ## %bb115.critedge.i
|
||||
; CHECK-NEXT: addl $28, %esp
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: popl %edi
|
||||
; CHECK-NEXT: popl %ebx
|
||||
; CHECK-NEXT: popl %ebp
|
||||
; CHECK-NEXT: retl $4
|
||||
; CHECK-NEXT: LBB0_25: ## %lpad.loopexit.split-lp
|
||||
; CHECK-NEXT: LBB0_23: ## %lpad.loopexit.split-lp
|
||||
; CHECK-NEXT: Ltmp15: ## EH_LABEL
|
||||
; CHECK-NEXT: jmp LBB0_6
|
||||
; CHECK-NEXT: LBB0_26: ## %lpad8185
|
||||
; CHECK-NEXT: jmp LBB0_25
|
||||
; CHECK-NEXT: LBB0_24: ## %lpad8185
|
||||
; CHECK-NEXT: Ltmp12: ## EH_LABEL
|
||||
; CHECK-NEXT: jmp LBB0_6
|
||||
; CHECK-NEXT: LBB0_24: ## %lpad.loopexit
|
||||
; CHECK-NEXT: jmp LBB0_25
|
||||
; CHECK-NEXT: LBB0_22: ## %lpad.loopexit
|
||||
; CHECK-NEXT: Ltmp18: ## EH_LABEL
|
||||
; CHECK-NEXT: jmp LBB0_6
|
||||
; CHECK-NEXT: jmp LBB0_25
|
||||
; CHECK-NEXT: Lfunc_end0:
|
||||
entry:
|
||||
br i1 %foo, label %bb116.i, label %bb115.critedge.i
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user