[AMDGPU][GFX12.5] Add support for emitting memory operations with nv bit set (#179413)
- Add `MONonVolatile` MachineMemOperand flag. - Set nv=1 on memory operations on GFX12.5 if the operation accesses a constant address space, is an invariant load, or has the `MONonVolatile` flag set.
This commit is contained in:
parent
d64a609b2b
commit
b738491d2f
@ -9929,6 +9929,7 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
|
||||
{MONoClobber, "amdgpu-noclobber"},
|
||||
{MOLastUse, "amdgpu-last-use"},
|
||||
{MOCooperative, "amdgpu-cooperative"},
|
||||
{MOThreadPrivate, "amdgpu-thread-private"},
|
||||
};
|
||||
|
||||
return ArrayRef(TargetFlags);
|
||||
|
||||
@ -52,6 +52,11 @@ static const MachineMemOperand::Flags MOLastUse =
|
||||
static const MachineMemOperand::Flags MOCooperative =
|
||||
MachineMemOperand::MOTargetFlag3;
|
||||
|
||||
/// Mark the MMO of accesses to memory locations that are
|
||||
/// never written to by other threads.
|
||||
static const MachineMemOperand::Flags MOThreadPrivate =
|
||||
MachineMemOperand::MOTargetFlag4;
|
||||
|
||||
/// Utility to store machine instructions worklist.
|
||||
struct SIInstrWorklist {
|
||||
SIInstrWorklist() = default;
|
||||
|
||||
@ -398,6 +398,10 @@ public:
|
||||
bool IsCrossAddrSpaceOrdering,
|
||||
Position Pos) const = 0;
|
||||
|
||||
/// Handle operations that are considered non-volatile.
|
||||
/// See \ref isNonVolatileMemoryAccess
|
||||
virtual bool handleNonVolatile(MachineInstr &MI) const { return false; }
|
||||
|
||||
/// Virtual destructor to allow derivations to be deleted.
|
||||
virtual ~SICacheControl() = default;
|
||||
};
|
||||
@ -555,6 +559,8 @@ public:
|
||||
SIAtomicAddrSpace AddrSpace) const override {
|
||||
return setAtomicScope(MI, Scope, AddrSpace);
|
||||
}
|
||||
|
||||
bool handleNonVolatile(MachineInstr &MI) const override;
|
||||
};
|
||||
|
||||
class SIMemoryLegalizer final {
|
||||
@ -899,6 +905,18 @@ SIMemOpAccess::getLDSDMAInfo(const MachineBasicBlock::iterator &MI) const {
|
||||
return constructFromMIWithMMO(MI);
|
||||
}
|
||||
|
||||
/// \returns true if \p MI has one or more MMO, and all of them are fit for
|
||||
/// being marked as non-volatile. This means that either they are accessing the
|
||||
/// constant address space, are accessing a known invariant memory location, or
|
||||
/// that they are marked with the non-volatile metadata/MMO flag.
|
||||
static bool isNonVolatileMemoryAccess(const MachineInstr &MI) {
|
||||
if (MI.getNumMemOperands() == 0)
|
||||
return false;
|
||||
return all_of(MI.memoperands(), [&](const MachineMemOperand *MMO) {
|
||||
return MMO->getFlags() & (MOThreadPrivate | MachineMemOperand::MOInvariant);
|
||||
});
|
||||
}
|
||||
|
||||
SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
|
||||
TII = ST.getInstrInfo();
|
||||
IV = getIsaVersion(ST.getCPU());
|
||||
@ -2061,6 +2079,17 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SIGfx12CacheControl::handleNonVolatile(MachineInstr &MI) const {
|
||||
// On GFX12.5, set the NV CPol bit.
|
||||
if (!ST.hasGFX1250Insts())
|
||||
return false;
|
||||
MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol);
|
||||
if (!CPol)
|
||||
return false;
|
||||
CPol->setImm(CPol->getImm() | AMDGPU::CPol::NV);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
|
||||
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
|
||||
bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
|
||||
@ -2456,20 +2485,21 @@ bool SIMemoryLegalizer::run(MachineFunction &MF) {
|
||||
MI = II->getIterator();
|
||||
}
|
||||
|
||||
if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
|
||||
continue;
|
||||
|
||||
if (const auto &MOI = MOA.getLoadInfo(MI)) {
|
||||
Changed |= expandLoad(*MOI, MI);
|
||||
} else if (const auto &MOI = MOA.getStoreInfo(MI)) {
|
||||
Changed |= expandStore(*MOI, MI);
|
||||
} else if (const auto &MOI = MOA.getLDSDMAInfo(MI)) {
|
||||
Changed |= expandLDSDMA(*MOI, MI);
|
||||
} else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) {
|
||||
Changed |= expandAtomicFence(*MOI, MI);
|
||||
} else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) {
|
||||
Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI);
|
||||
if (MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic) {
|
||||
if (const auto &MOI = MOA.getLoadInfo(MI))
|
||||
Changed |= expandLoad(*MOI, MI);
|
||||
else if (const auto &MOI = MOA.getStoreInfo(MI))
|
||||
Changed |= expandStore(*MOI, MI);
|
||||
else if (const auto &MOI = MOA.getLDSDMAInfo(MI))
|
||||
Changed |= expandLDSDMA(*MOI, MI);
|
||||
else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
|
||||
Changed |= expandAtomicFence(*MOI, MI);
|
||||
else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
|
||||
Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI);
|
||||
}
|
||||
|
||||
if (isNonVolatileMemoryAccess(*MI))
|
||||
Changed |= CC->handleNonVolatile(*MI);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
365
llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.ll
Normal file
365
llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.ll
Normal file
@ -0,0 +1,365 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU,GFX12-CU-DAGISEL %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU,GFX12-CU-GISEL %s
|
||||
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-DAGISEL %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-GISEL %s
|
||||
|
||||
define void @flat_i32_nonatomic(ptr addrspace(0) %in, ptr addrspace(0) %out) {
|
||||
; GFX12-CU-LABEL: flat_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: flat_load_b32 v0, v[0:1]
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[2:3], v0
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: flat_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v[2:3], v0
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(0) %in
|
||||
store i32 %val, ptr addrspace(0) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @md_invariant__flat_i32_nonatomic(ptr addrspace(0) %in, ptr addrspace(0) %out) {
|
||||
; GFX12-CU-LABEL: md_invariant__flat_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: flat_load_b32 v0, v[0:1]
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: md_invariant__flat_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] nv
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(0) %in, !invariant.load !0
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define void @global_i32_nonatomic(ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
; GFX12-CU-LABEL: global_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: global_load_b32 v0, v[0:1], off
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v[2:3], v0, off
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: global_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b32 v0, v[0:1], off
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v[2:3], v0, off
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(1) %in
|
||||
store i32 %val, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @md_invariant__global_i32_nonatomic(ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
; GFX12-CU-LABEL: md_invariant__global_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: global_load_b32 v0, v[0:1], off
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: md_invariant__global_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b32 v0, v[0:1], off nv
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(1) %in, !invariant.load !0
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i32 @scalar_i32_nonatomic(ptr addrspace(4) inreg %in) {
|
||||
; GFX12-CU-LABEL: scalar_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: scalar_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 nv
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(4) %in
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i32 @md_invariant__scalar_i32_nonatomic(ptr addrspace(4) inreg %in) {
|
||||
; GFX12-CU-LABEL: md_invariant__scalar_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: md_invariant__scalar_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 nv
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(4) %in, !invariant.load !0
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define void @scratch_i32_nonatomic(ptr addrspace(5) %in, ptr addrspace(5) %out) {
|
||||
; GFX12-CU-LABEL: scratch_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: scratch_load_b32 v0, v0, off
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: scratch_store_b32 v1, v0, off
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: scratch_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: scratch_load_b32 v0, v0, off
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: scratch_store_b32 v1, v0, off
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(5) %in
|
||||
store i32 %val, ptr addrspace(5) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @md_invariant__scratch_i32_nonatomic(ptr addrspace(5) %in, ptr addrspace(5) %out) {
|
||||
; GFX12-CU-LABEL: md_invariant__scratch_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: scratch_load_b32 v0, v0, off
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: md_invariant__scratch_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: scratch_load_b32 v0, v0, off nv
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(5) %in, !invariant.load !0
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i32 @scalar32_i32_nonatomic(ptr addrspace(6) inreg %in) {
|
||||
; GFX12-CU-LABEL: scalar32_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: s_mov_b32 s1, 0
|
||||
; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: scalar32_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s1, 0
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 nv
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(6) %in
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i32 @md_invariant__scalar32_i32_nonatomic(ptr addrspace(6) inreg %in) {
|
||||
; GFX12-CU-LABEL: md_invariant__scalar32_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: s_mov_b32 s1, 0
|
||||
; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: md_invariant__scalar32_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s1, 0
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 nv
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(6) %in, !invariant.load !0
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define void @buffer_i32_nonatomic(ptr addrspace(7) inreg %in, ptr addrspace(7) inreg %out) {
|
||||
; GFX12-CU-DAGISEL-LABEL: buffer_i32_nonatomic:
|
||||
; GFX12-CU-DAGISEL: ; %bb.0: ; %entry
|
||||
; GFX12-CU-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-DAGISEL-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-DAGISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-DAGISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-DAGISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-DAGISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21
|
||||
; GFX12-CU-DAGISEL-NEXT: s_mov_b32 s7, s20
|
||||
; GFX12-CU-DAGISEL-NEXT: s_mov_b32 s6, s19
|
||||
; GFX12-CU-DAGISEL-NEXT: s_mov_b32 s5, s18
|
||||
; GFX12-CU-DAGISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
|
||||
; GFX12-CU-DAGISEL-NEXT: s_mov_b32 s4, s17
|
||||
; GFX12-CU-DAGISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-DAGISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen
|
||||
; GFX12-CU-DAGISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-CU-GISEL-LABEL: buffer_i32_nonatomic:
|
||||
; GFX12-CU-GISEL: ; %bb.0: ; %entry
|
||||
; GFX12-CU-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-GISEL-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-GISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21
|
||||
; GFX12-CU-GISEL-NEXT: s_mov_b32 s4, s17
|
||||
; GFX12-CU-GISEL-NEXT: s_mov_b32 s5, s18
|
||||
; GFX12-CU-GISEL-NEXT: s_mov_b32 s6, s19
|
||||
; GFX12-CU-GISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
|
||||
; GFX12-CU-GISEL-NEXT: s_mov_b32 s7, s20
|
||||
; GFX12-CU-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-GISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen
|
||||
; GFX12-CU-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-DAGISEL-LABEL: buffer_i32_nonatomic:
|
||||
; GFX1250-DAGISEL: ; %bb.0: ; %entry
|
||||
; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-DAGISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21
|
||||
; GFX1250-DAGISEL-NEXT: s_mov_b32 s7, s20
|
||||
; GFX1250-DAGISEL-NEXT: s_mov_b32 s6, s19
|
||||
; GFX1250-DAGISEL-NEXT: s_mov_b32 s5, s18
|
||||
; GFX1250-DAGISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
|
||||
; GFX1250-DAGISEL-NEXT: s_mov_b32 s4, s17
|
||||
; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-DAGISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen
|
||||
; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
|
||||
;
|
||||
; GFX1250-GISEL-LABEL: buffer_i32_nonatomic:
|
||||
; GFX1250-GISEL: ; %bb.0: ; %entry
|
||||
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21
|
||||
; GFX1250-GISEL-NEXT: s_mov_b32 s4, s17
|
||||
; GFX1250-GISEL-NEXT: s_mov_b32 s5, s18
|
||||
; GFX1250-GISEL-NEXT: s_mov_b32 s6, s19
|
||||
; GFX1250-GISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
|
||||
; GFX1250-GISEL-NEXT: s_mov_b32 s7, s20
|
||||
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-GISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen
|
||||
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(7) %in
|
||||
store i32 %val, ptr addrspace(7) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @md_invariant__buffer_i32_nonatomic(ptr addrspace(7) inreg %in, ptr addrspace(7) inreg %out) {
|
||||
; GFX12-CU-LABEL: md_invariant__buffer_i32_nonatomic:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_expcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s16
|
||||
; GFX12-CU-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1250-LABEL: md_invariant__buffer_i32_nonatomic:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s16
|
||||
; GFX1250-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen nv
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(7) %in, !invariant.load !0
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
!0 = !{}
|
||||
@ -85,7 +85,7 @@ body: |
|
||||
; GFX1250-LABEL: name: promote_async_load_u64
|
||||
; GFX1250: liveins: $vgpr0, $sgpr4_sgpr5
|
||||
; GFX1250-NEXT: {{ $}}
|
||||
; GFX1250-NEXT: early-clobber renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM_ec killed renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; GFX1250-NEXT: early-clobber renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM_ec killed renamable $sgpr4_sgpr5, 36, 32 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; GFX1250-NEXT: renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX1250-NEXT: renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr0, implicit $exec
|
||||
; GFX1250-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B128_SADDR $vgpr1, $sgpr0_sgpr1, $vgpr0, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt :: (load store (s128), align 1, addrspace 3)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user