[AMDGPU] Expand scratch atomics to flat atomics if GAS is enabled
This commit is contained in:
parent
3c6b5f75a5
commit
6cd7c41646
@ -17808,11 +17808,19 @@ static bool flatInstrMayAccessPrivate(const Instruction *I) {
|
||||
!AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS);
|
||||
}
|
||||
|
||||
static TargetLowering::AtomicExpansionKind
|
||||
getPrivateAtomicExpansionKind(const GCNSubtarget &STI) {
|
||||
// For GAS, lower to flat atomic.
|
||||
return STI.hasGloballyAddressableScratch()
|
||||
? TargetLowering::AtomicExpansionKind::Expand
|
||||
: TargetLowering::AtomicExpansionKind::NotAtomic;
|
||||
}
|
||||
|
||||
TargetLowering::AtomicExpansionKind
|
||||
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||
unsigned AS = RMW->getPointerAddressSpace();
|
||||
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return AtomicExpansionKind::NotAtomic;
|
||||
return getPrivateAtomicExpansionKind(*getSubtarget());
|
||||
|
||||
// 64-bit flat atomics that dynamically reside in private memory will silently
|
||||
// be dropped.
|
||||
@ -18083,14 +18091,14 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||
TargetLowering::AtomicExpansionKind
|
||||
SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
|
||||
return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
|
||||
? AtomicExpansionKind::NotAtomic
|
||||
? getPrivateAtomicExpansionKind(*getSubtarget())
|
||||
: AtomicExpansionKind::None;
|
||||
}
|
||||
|
||||
TargetLowering::AtomicExpansionKind
|
||||
SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
|
||||
return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
|
||||
? AtomicExpansionKind::NotAtomic
|
||||
? getPrivateAtomicExpansionKind(*getSubtarget())
|
||||
: AtomicExpansionKind::None;
|
||||
}
|
||||
|
||||
@ -18098,7 +18106,7 @@ TargetLowering::AtomicExpansionKind
|
||||
SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
|
||||
unsigned AddrSpace = CmpX->getPointerAddressSpace();
|
||||
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return AtomicExpansionKind::NotAtomic;
|
||||
return getPrivateAtomicExpansionKind(*getSubtarget());
|
||||
|
||||
if (AddrSpace != AMDGPUAS::FLAT_ADDRESS || !flatInstrMayAccessPrivate(CmpX))
|
||||
return AtomicExpansionKind::None;
|
||||
@ -18468,9 +18476,24 @@ void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
|
||||
Builder.CreateBr(ExitBB);
|
||||
}
|
||||
|
||||
static void convertScratchAtomicToFlatAtomic(Instruction *I,
|
||||
unsigned PtrOpIdx) {
|
||||
Value *PtrOp = I->getOperand(PtrOpIdx);
|
||||
assert(PtrOp->getType()->getPointerAddressSpace() ==
|
||||
AMDGPUAS::PRIVATE_ADDRESS);
|
||||
|
||||
Type *FlatPtr = PointerType::get(I->getContext(), AMDGPUAS::FLAT_ADDRESS);
|
||||
Value *ASCast = CastInst::CreatePointerCast(PtrOp, FlatPtr, "scratch.ascast",
|
||||
I->getIterator());
|
||||
I->setOperand(PtrOpIdx, ASCast);
|
||||
}
|
||||
|
||||
void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
|
||||
AtomicRMWInst::BinOp Op = AI->getOperation();
|
||||
|
||||
if (AI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return convertScratchAtomicToFlatAtomic(AI, AI->getPointerOperandIndex());
|
||||
|
||||
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
|
||||
Op == AtomicRMWInst::Xor) {
|
||||
if (const auto *ConstVal = dyn_cast<Constant>(AI->getValOperand());
|
||||
@ -18493,9 +18516,28 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
|
||||
}
|
||||
|
||||
void SITargetLowering::emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
|
||||
if (CI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return convertScratchAtomicToFlatAtomic(CI, CI->getPointerOperandIndex());
|
||||
|
||||
emitExpandAtomicAddrSpacePredicate(CI);
|
||||
}
|
||||
|
||||
void SITargetLowering::emitExpandAtomicLoad(LoadInst *LI) const {
|
||||
if (LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return convertScratchAtomicToFlatAtomic(LI, LI->getPointerOperandIndex());
|
||||
|
||||
llvm_unreachable(
|
||||
"Expand Atomic Load only handles SCRATCH -> FLAT conversion");
|
||||
}
|
||||
|
||||
void SITargetLowering::emitExpandAtomicStore(StoreInst *SI) const {
|
||||
if (SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return convertScratchAtomicToFlatAtomic(SI, SI->getPointerOperandIndex());
|
||||
|
||||
llvm_unreachable(
|
||||
"Expand Atomic Store only handles SCRATCH -> FLAT conversion");
|
||||
}
|
||||
|
||||
LoadInst *
|
||||
SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
|
||||
IRBuilder<> Builder(AI);
|
||||
|
@ -562,6 +562,8 @@ public:
|
||||
void emitExpandAtomicAddrSpacePredicate(Instruction *AI) const;
|
||||
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
|
||||
void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const override;
|
||||
void emitExpandAtomicLoad(LoadInst *LI) const override;
|
||||
void emitExpandAtomicStore(StoreInst *SI) const override;
|
||||
|
||||
LoadInst *
|
||||
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
|
||||
|
@ -86,15 +86,3 @@ entry:
|
||||
store atomic i32 %val, ptr addrspace(3) %dst syncscope("wavefront") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN: scratch_atomic_store:
|
||||
; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
|
||||
; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
|
||||
; GCN: .amdhsa_kernel scratch_atomic_store
|
||||
; CU: .amdhsa_uses_cu_stores 1
|
||||
; NOCU: .amdhsa_uses_cu_stores 0
|
||||
define amdgpu_kernel void @scratch_atomic_store(ptr addrspace(5) %dst, i32 %val) {
|
||||
entry:
|
||||
store atomic i32 %val, ptr addrspace(5) %dst syncscope("wavefront") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,172 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1200 %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1250 %s
|
||||
|
||||
define void @system_atomic_store_unordered_float(ptr addrspace(5) %addr, float %val) {
|
||||
; GFX1200-LABEL: define void @system_atomic_store_unordered_float(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; GFX1200-NEXT: store float [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret void
|
||||
;
|
||||
; GFX1250-LABEL: define void @system_atomic_store_unordered_float(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: store atomic float [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||
; GFX1250-NEXT: ret void
|
||||
;
|
||||
store atomic float %val, ptr addrspace(5) %addr unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @system_atomic_store_unordered_i32(ptr addrspace(5) %addr, i32 %val) {
|
||||
; GFX1200-LABEL: define void @system_atomic_store_unordered_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret void
|
||||
;
|
||||
; GFX1250-LABEL: define void @system_atomic_store_unordered_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||
; GFX1250-NEXT: ret void
|
||||
;
|
||||
store atomic i32 %val, ptr addrspace(5) %addr unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @system_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) {
|
||||
; GFX1200-LABEL: define void @system_atomic_store_release_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret void
|
||||
;
|
||||
; GFX1250-LABEL: define void @system_atomic_store_release_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] release, align 4
|
||||
; GFX1250-NEXT: ret void
|
||||
;
|
||||
store atomic i32 %val, ptr addrspace(5) %addr release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @workgroup_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) {
|
||||
; GFX1200-LABEL: define void @workgroup_atomic_store_release_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret void
|
||||
;
|
||||
; GFX1250-LABEL: define void @workgroup_atomic_store_release_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] syncscope("workgroup") release, align 4
|
||||
; GFX1250-NEXT: ret void
|
||||
;
|
||||
store atomic i32 %val, ptr addrspace(5) %addr syncscope("workgroup") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define float @system_atomic_load_unordered_float(ptr addrspace(5) %addr) {
|
||||
; GFX1200-LABEL: define float @system_atomic_load_unordered_float(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[VAL:%.*]] = load float, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret float [[VAL]]
|
||||
;
|
||||
; GFX1250-LABEL: define float @system_atomic_load_unordered_float(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = load atomic float, ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||
; GFX1250-NEXT: ret float [[VAL]]
|
||||
;
|
||||
%val = load atomic float, ptr addrspace(5) %addr unordered, align 4
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define i32 @system_atomic_load_unordered_i32(ptr addrspace(5) %addr) {
|
||||
; GFX1200-LABEL: define i32 @system_atomic_load_unordered_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
; GFX1250-LABEL: define i32 @system_atomic_load_unordered_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = load atomic i32, ptr addrspace(5) %addr unordered, align 4
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i32 @system_atomic_load_acquire_i32(ptr addrspace(5) %addr) {
|
||||
; GFX1200-LABEL: define i32 @system_atomic_load_acquire_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
; GFX1250-LABEL: define i32 @system_atomic_load_acquire_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] acquire, align 4
|
||||
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = load atomic i32, ptr addrspace(5) %addr acquire, align 4
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i32 @workgroup_atomic_load_acquire_i32(ptr addrspace(5) %addr) {
|
||||
; GFX1200-LABEL: define i32 @workgroup_atomic_load_acquire_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
; GFX1250-LABEL: define i32 @workgroup_atomic_load_acquire_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] syncscope("workgroup") acquire, align 4
|
||||
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = load atomic i32, ptr addrspace(5) %addr syncscope("workgroup") acquire, align 4
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(ptr addrspace(5) %addr, i32 %old, i32 %in) {
|
||||
; GFX1200-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[OLD]]
|
||||
; GFX1200-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[IN]], i32 [[TMP1]]
|
||||
; GFX1200-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0
|
||||
; GFX1200-NEXT: [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1
|
||||
; GFX1200-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX1200-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
; GFX1250-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = cmpxchg volatile ptr [[SCRATCH_ASCAST]], i32 [[OLD]], i32 [[IN]] acq_rel acquire, align 4
|
||||
; GFX1250-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
|
||||
; GFX1250-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
%val = cmpxchg volatile ptr addrspace(5) %addr, i32 %old, i32 %in acq_rel acquire
|
||||
%res = extractvalue { i32, i1 } %val, 0
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @system_atomicrmw_add_acq_rel_i32(ptr addrspace(5) %addr, i32 %in) {
|
||||
; GFX1200-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: store i32 [[IN]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret i32 [[TMP1]]
|
||||
;
|
||||
; GFX1250-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = atomicrmw volatile xchg ptr [[SCRATCH_ASCAST]], i32 [[IN]] acq_rel, align 4
|
||||
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = atomicrmw volatile xchg ptr addrspace(5) %addr, i32 %in acq_rel
|
||||
ret i32 %val
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user