[AMDGPU][gfx1250] Use SCOPE_SE for stores that may hit scratch (#150586)
This commit is contained in:
parent
d4f9c11e06
commit
2ad4e93ded
@ -552,7 +552,7 @@ public:
|
||||
(!Inst.mayLoad() || SIInstrInfo::isAtomicNoRet(Inst))) {
|
||||
// FLAT and SCRATCH instructions may access scratch. Other VMEM
|
||||
// instructions do not.
|
||||
if (SIInstrInfo::isFLAT(Inst) && mayAccessScratchThroughFlat(Inst))
|
||||
if (TII->mayAccessScratchThroughFlat(Inst))
|
||||
return SCRATCH_WRITE_ACCESS;
|
||||
return VMEM_WRITE_ACCESS;
|
||||
}
|
||||
@ -565,7 +565,6 @@ public:
|
||||
|
||||
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const;
|
||||
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
|
||||
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const;
|
||||
bool isVmemAccess(const MachineInstr &MI) const;
|
||||
bool generateWaitcntInstBefore(MachineInstr &MI,
|
||||
WaitcntBrackets &ScoreBrackets,
|
||||
@ -2160,32 +2159,6 @@ bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
// This is a flat memory operation. Check to see if it has memory tokens for
|
||||
// either scratch or FLAT.
|
||||
bool SIInsertWaitcnts::mayAccessScratchThroughFlat(
|
||||
const MachineInstr &MI) const {
|
||||
assert(TII->isFLAT(MI));
|
||||
|
||||
// SCRATCH instructions always access scratch.
|
||||
if (TII->isFLATScratch(MI))
|
||||
return true;
|
||||
|
||||
// GLOBAL instructions never access scratch.
|
||||
if (TII->isFLATGlobal(MI))
|
||||
return false;
|
||||
|
||||
// If there are no memory operands then conservatively assume the flat
|
||||
// operation may access scratch.
|
||||
if (MI.memoperands_empty())
|
||||
return true;
|
||||
|
||||
// See if any memory operand specifies an address space that involves scratch.
|
||||
return any_of(MI.memoperands(), [](const MachineMemOperand *Memop) {
|
||||
unsigned AS = Memop->getAddrSpace();
|
||||
return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
|
||||
});
|
||||
}
|
||||
|
||||
bool SIInsertWaitcnts::isVmemAccess(const MachineInstr &MI) const {
|
||||
return (TII->isFLAT(MI) && mayAccessVMEMThroughFlat(MI)) ||
|
||||
(TII->isVMEM(MI) && !AMDGPU::getMUBUFIsBufferInv(MI.getOpcode()));
|
||||
|
||||
@ -4249,6 +4249,32 @@ bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
|
||||
Opcode == AMDGPU::DS_SUB_GS_REG_RTN || isGWS(Opcode);
|
||||
}
|
||||
|
||||
bool SIInstrInfo::mayAccessScratchThroughFlat(const MachineInstr &MI) const {
|
||||
if (!isFLAT(MI) || isFLATGlobal(MI))
|
||||
return false;
|
||||
|
||||
// If scratch is not initialized, we can never access it.
|
||||
if (MI.getMF()->getFunction().hasFnAttribute("amdgpu-no-flat-scratch-init"))
|
||||
return false;
|
||||
|
||||
// SCRATCH instructions always access scratch.
|
||||
if (isFLATScratch(MI))
|
||||
return true;
|
||||
|
||||
// If there are no memory operands then conservatively assume the flat
|
||||
// operation may access scratch.
|
||||
if (MI.memoperands_empty())
|
||||
return true;
|
||||
|
||||
// TODO (?): Does this need to be taught how to read noalias.addrspace ?
|
||||
|
||||
// See if any memory operand specifies an address space that involves scratch.
|
||||
return any_of(MI.memoperands(), [](const MachineMemOperand *Memop) {
|
||||
unsigned AS = Memop->getAddrSpace();
|
||||
return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
|
||||
});
|
||||
}
|
||||
|
||||
bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) {
|
||||
// Skip the full operand and register alias search modifiesRegister
|
||||
// does. There's only a handful of instructions that touch this, it's only an
|
||||
|
||||
@ -678,6 +678,12 @@ public:
|
||||
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
|
||||
}
|
||||
|
||||
/// \returns true for SCRATCH_ instructions, or FLAT_ instructions with
|
||||
/// SCRATCH_ memory operands.
|
||||
/// Conservatively correct; will return true if \p MI cannot be proven
|
||||
/// to not hit scratch.
|
||||
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const;
|
||||
|
||||
static bool isBlockLoadStore(uint16_t Opcode) {
|
||||
switch (Opcode) {
|
||||
case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
|
||||
|
||||
@ -321,7 +321,8 @@ public:
|
||||
bool IsNonTemporal,
|
||||
bool IsLastUse = false) const = 0;
|
||||
|
||||
virtual bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const {
|
||||
virtual bool finalizeStore(MachineBasicBlock::iterator &MI,
|
||||
bool Atomic) const {
|
||||
return false;
|
||||
};
|
||||
|
||||
@ -602,7 +603,8 @@ public:
|
||||
bool IsVolatile, bool IsNonTemporal,
|
||||
bool IsLastUse) const override;
|
||||
|
||||
bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const override;
|
||||
bool finalizeStore(MachineBasicBlock::iterator &MI,
|
||||
bool Atomic) const override;
|
||||
|
||||
bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
|
||||
@ -2551,11 +2553,25 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SIGfx12CacheControl::expandSystemScopeStore(
|
||||
MachineBasicBlock::iterator &MI) const {
|
||||
bool SIGfx12CacheControl::finalizeStore(MachineBasicBlock::iterator &MI,
|
||||
bool Atomic) const {
|
||||
MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
|
||||
if (CPol && ((CPol->getImm() & CPol::SCOPE) == CPol::SCOPE_SYS))
|
||||
return insertWaitsBeforeSystemScopeStore(MI);
|
||||
if (!CPol)
|
||||
return false;
|
||||
|
||||
const unsigned Scope = CPol->getImm() & CPol::SCOPE;
|
||||
|
||||
// GFX12.0 only: Extra waits needed before system scope stores.
|
||||
if (!ST.hasGFX1250Insts()) {
|
||||
if (!Atomic && Scope == CPol::SCOPE_SYS)
|
||||
return insertWaitsBeforeSystemScopeStore(MI);
|
||||
return false;
|
||||
}
|
||||
|
||||
// GFX12.5 only: Require SCOPE_SE on stores that may hit the scratch address
|
||||
// space.
|
||||
if (TII->mayAccessScratchThroughFlat(*MI) && Scope == CPol::SCOPE_CU)
|
||||
return setScope(MI, CPol::SCOPE_SE);
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -2674,6 +2690,7 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
|
||||
MOI.getIsCrossAddressSpaceOrdering(),
|
||||
Position::BEFORE);
|
||||
|
||||
Changed |= CC->finalizeStore(MI, /*Atomic=*/true);
|
||||
return Changed;
|
||||
}
|
||||
|
||||
@ -2686,7 +2703,7 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
|
||||
|
||||
// GFX12 specific, scope(desired coherence domain in cache hierarchy) is
|
||||
// instruction field, do not confuse it with atomic scope.
|
||||
Changed |= CC->expandSystemScopeStore(MI);
|
||||
Changed |= CC->finalizeStore(MI, /*Atomic=*/false);
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
||||
95
llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
Normal file
95
llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
Normal file
@ -0,0 +1,95 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL %s
|
||||
|
||||
; Test that stores that may hit scratch are correctly promoted to SCOPE_SE.
|
||||
|
||||
define void @test_scratch_store(ptr addrspace(5) %ptr, i32 %val) {
|
||||
; GCN-LABEL: test_scratch_store:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-NEXT: scratch_store_b32 v0, v1, off scope:SCOPE_SE
|
||||
; GCN-NEXT: s_set_pc_i64 s[30:31]
|
||||
store i32 %val, ptr addrspace(5) %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_unknown_flat_store(ptr %ptr, i32 %val) {
|
||||
; GCN-LABEL: test_unknown_flat_store:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
|
||||
; GCN-NEXT: s_wait_dscnt 0x0
|
||||
; GCN-NEXT: s_set_pc_i64 s[30:31]
|
||||
store i32 %val, ptr %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_flat_store_no_scratch_alloc(ptr %ptr, i32 %val) #0 {
|
||||
; GCN-LABEL: test_flat_store_no_scratch_alloc:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GCN-NEXT: s_wait_dscnt 0x0
|
||||
; GCN-NEXT: s_set_pc_i64 s[30:31]
|
||||
store i32 %val, ptr %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: handle
|
||||
define void @test_flat_store_noalias_addrspace(ptr %ptr, i32 %val) {
|
||||
; GCN-LABEL: test_flat_store_noalias_addrspace:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
|
||||
; GCN-NEXT: s_wait_dscnt 0x0
|
||||
; GCN-NEXT: s_set_pc_i64 s[30:31]
|
||||
store i32 %val, ptr %ptr, !noalias.addrspace !{i32 5, i32 6}
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: would be nice to handle too
|
||||
define void @test_flat_store_select(ptr addrspace(1) %a, ptr addrspace(3) %b, i1 %cond, i32 %val) {
|
||||
; GCN-SDAG-LABEL: test_flat_store_select:
|
||||
; GCN-SDAG: ; %bb.0:
|
||||
; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v2
|
||||
; GCN-SDAG-NEXT: v_and_b32_e32 v3, 1, v3
|
||||
; GCN-SDAG-NEXT: s_mov_b64 s[0:1], src_shared_base
|
||||
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo
|
||||
; GCN-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, s1, vcc_lo
|
||||
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GCN-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
|
||||
; GCN-SDAG-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v2, v0
|
||||
; GCN-SDAG-NEXT: flat_store_b32 v[0:1], v4 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: s_wait_dscnt 0x0
|
||||
; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
|
||||
;
|
||||
; GCN-GISEL-LABEL: test_flat_store_select:
|
||||
; GCN-GISEL: ; %bb.0:
|
||||
; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v2
|
||||
; GCN-GISEL-NEXT: v_and_b32_e32 v3, 1, v3
|
||||
; GCN-GISEL-NEXT: s_mov_b64 s[0:1], src_shared_base
|
||||
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo
|
||||
; GCN-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, s1, vcc_lo
|
||||
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GCN-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
|
||||
; GCN-GISEL-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v5, v1
|
||||
; GCN-GISEL-NEXT: flat_store_b32 v[0:1], v4 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: s_wait_dscnt 0x0
|
||||
; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31]
|
||||
%a.ascast = addrspacecast ptr addrspace(1) %a to ptr
|
||||
%b.ascast = addrspacecast ptr addrspace(3) %b to ptr
|
||||
%ptr = select i1 %cond, ptr %a.ascast, ptr %b.ascast
|
||||
store i32 %val, ptr %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-no-flat-scratch-init" }
|
||||
@ -124,27 +124,27 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
|
||||
; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-SDAG-NEXT: s_clause 0xd
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:52
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:48
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:44
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 offset:40
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v44, s32 offset:36
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v45, s32 offset:32
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v56, s32 offset:28
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v57, s32 offset:24
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v58, s32 offset:20
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v59, s32 offset:16
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v60, s32 offset:12
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v61, s32 offset:8
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v62, s32 offset:4
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v63, s32
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:52 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:48 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:44 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 offset:40 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v44, s32 offset:36 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v45, s32 offset:32 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v56, s32 offset:28 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v57, s32 offset:24 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v58, s32 offset:20 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v59, s32 offset:16 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v60, s32 offset:12 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v61, s32 offset:8 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v62, s32 offset:4 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v63, s32 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:224
|
||||
; GCN-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
|
||||
; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9], s32 offset:56 ; 16-byte Folded Spill
|
||||
; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9], s32 offset:56 scope:SCOPE_SE ; 16-byte Folded Spill
|
||||
; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:240
|
||||
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
|
||||
; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9], s32 offset:72 ; 16-byte Folded Spill
|
||||
; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9], s32 offset:72 scope:SCOPE_SE ; 16-byte Folded Spill
|
||||
; GCN-SDAG-NEXT: s_clause 0xd
|
||||
; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:192
|
||||
; GCN-SDAG-NEXT: global_load_b128 v[14:17], v[0:1], off offset:208
|
||||
@ -206,27 +206,27 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
|
||||
; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-GISEL-NEXT: s_clause 0xf
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:60
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:56
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:52
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:48
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:44
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32 offset:40
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v46, s32 offset:36
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v47, s32 offset:32
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v56, s32 offset:28
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v57, s32 offset:24
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v58, s32 offset:20
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v59, s32 offset:16
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v60, s32 offset:12
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v61, s32 offset:8
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v62, s32 offset:4
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v63, s32
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:60 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:56 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:52 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:48 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:44 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32 offset:40 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v46, s32 offset:36 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v47, s32 offset:32 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v56, s32 offset:28 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v57, s32 offset:24 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v58, s32 offset:20 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v59, s32 offset:16 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v60, s32 offset:12 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v61, s32 offset:8 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v62, s32 offset:4 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v63, s32 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: s_wait_xcnt 0x8
|
||||
; GCN-GISEL-NEXT: v_dual_mov_b32 v46, v3 :: v_dual_mov_b32 v47, v4
|
||||
; GCN-GISEL-NEXT: global_load_b128 v[2:5], v[0:1], off offset:32
|
||||
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GCN-GISEL-NEXT: scratch_store_b128 off, v[2:5], s32 offset:80 ; 16-byte Folded Spill
|
||||
; GCN-GISEL-NEXT: scratch_store_b128 off, v[2:5], s32 offset:80 scope:SCOPE_SE ; 16-byte Folded Spill
|
||||
; GCN-GISEL-NEXT: s_clause 0xe
|
||||
; GCN-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:48
|
||||
; GCN-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off offset:64
|
||||
@ -244,7 +244,7 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
|
||||
; GCN-GISEL-NEXT: global_load_b128 v[60:63], v[0:1], off offset:16
|
||||
; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:240
|
||||
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GCN-GISEL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:64 ; 16-byte Folded Spill
|
||||
; GCN-GISEL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:64 scope:SCOPE_SE ; 16-byte Folded Spill
|
||||
; GCN-GISEL-NEXT: scratch_load_b128 v[0:3], off, s32 offset:80 th:TH_LOAD_LU ; 16-byte Folded Reload
|
||||
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GCN-GISEL-NEXT: s_clause 0xe
|
||||
@ -299,10 +299,10 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
|
||||
; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-SDAG-NEXT: s_clause 0x3
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:12
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:8
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:4
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:12 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:8 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:4 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 scope:SCOPE_SE
|
||||
; GCN-SDAG-NEXT: s_clause 0x7
|
||||
; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:112
|
||||
; GCN-SDAG-NEXT: global_load_b128 v[18:21], v[0:1], off offset:96
|
||||
@ -385,12 +385,12 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
|
||||
; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-GISEL-NEXT: s_clause 0x5
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:20
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:16
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:12
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:8
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:4
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:20 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:16 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:12 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:8 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:4 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32 scope:SCOPE_SE
|
||||
; GCN-GISEL-NEXT: s_clause 0x7
|
||||
; GCN-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:80
|
||||
; GCN-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user