[AMDGPU] Add target feature for waits before system scope stores. NFC. (#164993)
This commit is contained in:
parent
74bb1f435f
commit
60f20ea465
@ -1466,6 +1466,13 @@ def FeatureClusters : SubtargetFeature< "clusters",
|
||||
"Has clusters of workgroups support"
|
||||
>;
|
||||
|
||||
def FeatureWaitsBeforeSystemScopeStores : SubtargetFeature<
|
||||
"waits-before-system-scope-stores",
|
||||
"RequiresWaitsBeforeSystemScopeStores",
|
||||
"true",
|
||||
"Target requires waits for loads and atomics before system scope stores"
|
||||
>;
|
||||
|
||||
// Dummy feature used to disable assembler instructions.
|
||||
def FeatureDisable : SubtargetFeature<"",
|
||||
"FeatureDisable","true",
|
||||
@ -2060,7 +2067,8 @@ def FeatureISAVersion12 : FeatureSet<
|
||||
FeatureMaxHardClauseLength32,
|
||||
Feature1_5xVGPRs,
|
||||
FeatureMemoryAtomicFAddF32DenormalSupport,
|
||||
FeatureBVHDualAndBVH8Insts
|
||||
FeatureBVHDualAndBVH8Insts,
|
||||
FeatureWaitsBeforeSystemScopeStores,
|
||||
]>;
|
||||
|
||||
def FeatureISAVersion12_50 : FeatureSet<
|
||||
|
||||
@ -290,6 +290,7 @@ protected:
|
||||
bool Has45BitNumRecordsBufferResource = false;
|
||||
|
||||
bool HasClusters = false;
|
||||
bool RequiresWaitsBeforeSystemScopeStores = false;
|
||||
|
||||
// Dummy feature to use for assembler in tablegen.
|
||||
bool FeatureDisable = false;
|
||||
@ -1861,6 +1862,10 @@ public:
|
||||
bool has45BitNumRecordsBufferResource() const {
|
||||
return Has45BitNumRecordsBufferResource;
|
||||
}
|
||||
|
||||
bool requiresWaitsBeforeSystemScopeStores() const {
|
||||
return RequiresWaitsBeforeSystemScopeStores;
|
||||
}
|
||||
};
|
||||
|
||||
class GCNUserSGPRUsageInfo {
|
||||
|
||||
@ -2673,7 +2673,8 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
|
||||
const unsigned Scope = CPol->getImm() & CPol::SCOPE;
|
||||
|
||||
// GFX12.0 only: Extra waits needed before system scope stores.
|
||||
if (!ST.hasGFX1250Insts() && !Atomic && Scope == CPol::SCOPE_SYS)
|
||||
if (ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
|
||||
Scope == CPol::SCOPE_SYS)
|
||||
Changed |= insertWaitsBeforeSystemScopeStore(MI.getIterator());
|
||||
|
||||
return Changed;
|
||||
|
||||
@ -1,22 +1,50 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
|
||||
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s
|
||||
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
|
||||
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-GISEL %s
|
||||
|
||||
define amdgpu_ps void @intrinsic_store_system_scope(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX12-LABEL: intrinsic_store_system_scope:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
; GFX1200-LABEL: intrinsic_store_system_scope:
|
||||
; GFX1200: ; %bb.0:
|
||||
; GFX1200-NEXT: buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS
|
||||
; GFX1200-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-LABEL: intrinsic_store_system_scope:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-SDAG-NEXT: buffer_store_b32 v0, v[2:3], s[0:3], s4 idxen offen scope:SCOPE_SYS
|
||||
; GFX1250-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-LABEL: intrinsic_store_system_scope:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
||||
; GFX1250-GISEL-NEXT: buffer_store_b32 v0, v[4:5], s[0:3], s4 idxen offen scope:SCOPE_SYS
|
||||
; GFX1250-GISEL-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.buffer.store.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 24)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @generic_store_volatile(i32 %val, ptr addrspace(1) %out) {
|
||||
; GFX12-LABEL: generic_store_volatile:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
; GFX1200-LABEL: generic_store_volatile:
|
||||
; GFX1200: ; %bb.0:
|
||||
; GFX1200-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_SYS
|
||||
; GFX1200-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1200-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-SDAG-LABEL: generic_store_volatile:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-SDAG-NEXT: global_store_b32 v[2:3], v0, off scope:SCOPE_SYS
|
||||
; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-LABEL: generic_store_volatile:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
||||
; GFX1250-GISEL-NEXT: global_store_b32 v[4:5], v0, off scope:SCOPE_SYS
|
||||
; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-GISEL-NEXT: s_endpgm
|
||||
store volatile i32 %val, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user