Compare commits
4 Commits
main
...
users/pier
Author | SHA1 | Date | |
---|---|---|---|
![]() |
6cd7c41646 | ||
![]() |
3c6b5f75a5 | ||
![]() |
9cdf588d22 | ||
![]() |
d05704bce4 |
@ -268,6 +268,7 @@ public:
|
||||
CmpArithIntrinsic, // Use a target-specific intrinsic for special compare
|
||||
// operations; used by X86.
|
||||
Expand, // Generic expansion in terms of other atomic operations.
|
||||
CustomExpand, // Custom target-specific expansion using TLI hooks.
|
||||
|
||||
// Rewrite to a non-atomic form for use in a known non-preemptible
|
||||
// environment.
|
||||
@ -2273,6 +2274,18 @@ public:
|
||||
"Generic atomicrmw expansion unimplemented on this target");
|
||||
}
|
||||
|
||||
/// Perform a atomic store using a target-specific way.
|
||||
virtual void emitExpandAtomicStore(StoreInst *SI) const {
|
||||
llvm_unreachable(
|
||||
"Generic atomic store expansion unimplemented on this target");
|
||||
}
|
||||
|
||||
/// Perform a atomic load using a target-specific way.
|
||||
virtual void emitExpandAtomicLoad(LoadInst *LI) const {
|
||||
llvm_unreachable(
|
||||
"Generic atomic load expansion unimplemented on this target");
|
||||
}
|
||||
|
||||
/// Perform a cmpxchg expansion using a target-specific method.
|
||||
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
|
||||
llvm_unreachable("Generic cmpxchg expansion unimplemented on this target");
|
||||
@ -2377,8 +2390,8 @@ public:
|
||||
}
|
||||
|
||||
/// Returns how the given (atomic) store should be expanded by the IR-level
|
||||
/// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try
|
||||
/// to use an atomicrmw xchg.
|
||||
/// AtomicExpand pass into. For instance AtomicExpansionKind::CustomExpand
|
||||
/// will try to use an atomicrmw xchg.
|
||||
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const {
|
||||
return AtomicExpansionKind::None;
|
||||
}
|
||||
|
@ -84,7 +84,7 @@ private:
|
||||
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
|
||||
StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
|
||||
bool tryExpandAtomicStore(StoreInst *SI);
|
||||
void expandAtomicStore(StoreInst *SI);
|
||||
void expandAtomicStoreToXChg(StoreInst *SI);
|
||||
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
|
||||
AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
|
||||
Value *
|
||||
@ -537,6 +537,9 @@ bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
|
||||
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
||||
LI->setAtomic(AtomicOrdering::NotAtomic);
|
||||
return true;
|
||||
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
|
||||
TLI->emitExpandAtomicLoad(LI);
|
||||
return true;
|
||||
default:
|
||||
llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
|
||||
}
|
||||
@ -546,8 +549,11 @@ bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
|
||||
switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
|
||||
case TargetLoweringBase::AtomicExpansionKind::None:
|
||||
return false;
|
||||
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
|
||||
TLI->emitExpandAtomicStore(SI);
|
||||
return true;
|
||||
case TargetLoweringBase::AtomicExpansionKind::Expand:
|
||||
expandAtomicStore(SI);
|
||||
expandAtomicStoreToXChg(SI);
|
||||
return true;
|
||||
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
||||
SI->setAtomic(AtomicOrdering::NotAtomic);
|
||||
@ -620,7 +626,7 @@ StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
|
||||
return NewSI;
|
||||
}
|
||||
|
||||
void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
|
||||
void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
|
||||
// This function is only called on atomic stores that are too large to be
|
||||
// atomic if implemented as a native store. So we replace them by an
|
||||
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
|
||||
@ -741,7 +747,7 @@ bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
|
||||
}
|
||||
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
||||
return lowerAtomicRMWInst(AI);
|
||||
case TargetLoweringBase::AtomicExpansionKind::Expand:
|
||||
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
|
||||
TLI->emitExpandAtomicRMW(AI);
|
||||
return true;
|
||||
default:
|
||||
@ -1695,7 +1701,7 @@ bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
||||
return true;
|
||||
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
||||
return lowerAtomicCmpXchgInst(CI);
|
||||
case TargetLoweringBase::AtomicExpansionKind::Expand: {
|
||||
case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
|
||||
TLI->emitExpandAtomicCmpXchg(CI);
|
||||
return true;
|
||||
}
|
||||
|
@ -17808,11 +17808,19 @@ static bool flatInstrMayAccessPrivate(const Instruction *I) {
|
||||
!AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS);
|
||||
}
|
||||
|
||||
static TargetLowering::AtomicExpansionKind
|
||||
getPrivateAtomicExpansionKind(const GCNSubtarget &STI) {
|
||||
// For GAS, lower to flat atomic.
|
||||
return STI.hasGloballyAddressableScratch()
|
||||
? TargetLowering::AtomicExpansionKind::Expand
|
||||
: TargetLowering::AtomicExpansionKind::NotAtomic;
|
||||
}
|
||||
|
||||
TargetLowering::AtomicExpansionKind
|
||||
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||
unsigned AS = RMW->getPointerAddressSpace();
|
||||
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return AtomicExpansionKind::NotAtomic;
|
||||
return getPrivateAtomicExpansionKind(*getSubtarget());
|
||||
|
||||
// 64-bit flat atomics that dynamically reside in private memory will silently
|
||||
// be dropped.
|
||||
@ -17823,7 +17831,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||
if (AS == AMDGPUAS::FLAT_ADDRESS &&
|
||||
DL.getTypeSizeInBits(RMW->getType()) == 64 &&
|
||||
flatInstrMayAccessPrivate(RMW))
|
||||
return AtomicExpansionKind::Expand;
|
||||
return AtomicExpansionKind::CustomExpand;
|
||||
|
||||
auto ReportUnsafeHWInst = [=](TargetLowering::AtomicExpansionKind Kind) {
|
||||
OptimizationRemarkEmitter ORE(RMW->getFunction());
|
||||
@ -17898,7 +17906,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||
// does. InstCombine transforms these with 0 to or, so undo that.
|
||||
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
|
||||
ConstVal && ConstVal->isNullValue())
|
||||
return AtomicExpansionKind::Expand;
|
||||
return AtomicExpansionKind::CustomExpand;
|
||||
}
|
||||
|
||||
// If the allocation could be in remote, fine-grained memory, the rmw
|
||||
@ -18027,9 +18035,9 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||
// fadd.
|
||||
if (Subtarget->hasLDSFPAtomicAddF32()) {
|
||||
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
|
||||
return AtomicExpansionKind::Expand;
|
||||
return AtomicExpansionKind::CustomExpand;
|
||||
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
|
||||
return AtomicExpansionKind::Expand;
|
||||
return AtomicExpansionKind::CustomExpand;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -18083,14 +18091,14 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||
TargetLowering::AtomicExpansionKind
|
||||
SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
|
||||
return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
|
||||
? AtomicExpansionKind::NotAtomic
|
||||
? getPrivateAtomicExpansionKind(*getSubtarget())
|
||||
: AtomicExpansionKind::None;
|
||||
}
|
||||
|
||||
TargetLowering::AtomicExpansionKind
|
||||
SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
|
||||
return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
|
||||
? AtomicExpansionKind::NotAtomic
|
||||
? getPrivateAtomicExpansionKind(*getSubtarget())
|
||||
: AtomicExpansionKind::None;
|
||||
}
|
||||
|
||||
@ -18098,7 +18106,7 @@ TargetLowering::AtomicExpansionKind
|
||||
SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
|
||||
unsigned AddrSpace = CmpX->getPointerAddressSpace();
|
||||
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return AtomicExpansionKind::NotAtomic;
|
||||
return getPrivateAtomicExpansionKind(*getSubtarget());
|
||||
|
||||
if (AddrSpace != AMDGPUAS::FLAT_ADDRESS || !flatInstrMayAccessPrivate(CmpX))
|
||||
return AtomicExpansionKind::None;
|
||||
@ -18109,7 +18117,7 @@ SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
|
||||
|
||||
// If a 64-bit flat atomic may alias private, we need to avoid using the
|
||||
// atomic in the private case.
|
||||
return DL.getTypeSizeInBits(ValTy) == 64 ? AtomicExpansionKind::Expand
|
||||
return DL.getTypeSizeInBits(ValTy) == 64 ? AtomicExpansionKind::CustomExpand
|
||||
: AtomicExpansionKind::None;
|
||||
}
|
||||
|
||||
@ -18468,9 +18476,24 @@ void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
|
||||
Builder.CreateBr(ExitBB);
|
||||
}
|
||||
|
||||
static void convertScratchAtomicToFlatAtomic(Instruction *I,
|
||||
unsigned PtrOpIdx) {
|
||||
Value *PtrOp = I->getOperand(PtrOpIdx);
|
||||
assert(PtrOp->getType()->getPointerAddressSpace() ==
|
||||
AMDGPUAS::PRIVATE_ADDRESS);
|
||||
|
||||
Type *FlatPtr = PointerType::get(I->getContext(), AMDGPUAS::FLAT_ADDRESS);
|
||||
Value *ASCast = CastInst::CreatePointerCast(PtrOp, FlatPtr, "scratch.ascast",
|
||||
I->getIterator());
|
||||
I->setOperand(PtrOpIdx, ASCast);
|
||||
}
|
||||
|
||||
void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
|
||||
AtomicRMWInst::BinOp Op = AI->getOperation();
|
||||
|
||||
if (AI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return convertScratchAtomicToFlatAtomic(AI, AI->getPointerOperandIndex());
|
||||
|
||||
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
|
||||
Op == AtomicRMWInst::Xor) {
|
||||
if (const auto *ConstVal = dyn_cast<Constant>(AI->getValOperand());
|
||||
@ -18493,9 +18516,28 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
|
||||
}
|
||||
|
||||
void SITargetLowering::emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
|
||||
if (CI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return convertScratchAtomicToFlatAtomic(CI, CI->getPointerOperandIndex());
|
||||
|
||||
emitExpandAtomicAddrSpacePredicate(CI);
|
||||
}
|
||||
|
||||
void SITargetLowering::emitExpandAtomicLoad(LoadInst *LI) const {
|
||||
if (LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return convertScratchAtomicToFlatAtomic(LI, LI->getPointerOperandIndex());
|
||||
|
||||
llvm_unreachable(
|
||||
"Expand Atomic Load only handles SCRATCH -> FLAT conversion");
|
||||
}
|
||||
|
||||
void SITargetLowering::emitExpandAtomicStore(StoreInst *SI) const {
|
||||
if (SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return convertScratchAtomicToFlatAtomic(SI, SI->getPointerOperandIndex());
|
||||
|
||||
llvm_unreachable(
|
||||
"Expand Atomic Store only handles SCRATCH -> FLAT conversion");
|
||||
}
|
||||
|
||||
LoadInst *
|
||||
SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
|
||||
IRBuilder<> Builder(AI);
|
||||
|
@ -562,6 +562,8 @@ public:
|
||||
void emitExpandAtomicAddrSpacePredicate(Instruction *AI) const;
|
||||
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
|
||||
void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const override;
|
||||
void emitExpandAtomicLoad(LoadInst *LI) const override;
|
||||
void emitExpandAtomicStore(StoreInst *SI) const override;
|
||||
|
||||
LoadInst *
|
||||
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
|
||||
|
@ -7893,7 +7893,7 @@ LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
||||
if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
|
||||
AI->getOperation() == AtomicRMWInst::Or ||
|
||||
AI->getOperation() == AtomicRMWInst::Xor))
|
||||
return AtomicExpansionKind::Expand;
|
||||
return AtomicExpansionKind::CustomExpand;
|
||||
if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
}
|
||||
|
@ -86,15 +86,3 @@ entry:
|
||||
store atomic i32 %val, ptr addrspace(3) %dst syncscope("wavefront") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN: scratch_atomic_store:
|
||||
; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
|
||||
; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
|
||||
; GCN: .amdhsa_kernel scratch_atomic_store
|
||||
; CU: .amdhsa_uses_cu_stores 1
|
||||
; NOCU: .amdhsa_uses_cu_stores 0
|
||||
define amdgpu_kernel void @scratch_atomic_store(ptr addrspace(5) %dst, i32 %val) {
|
||||
entry:
|
||||
store atomic i32 %val, ptr addrspace(5) %dst syncscope("wavefront") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
23707
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll
Normal file
23707
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll
Normal file
File diff suppressed because it is too large
Load Diff
23605
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll
Normal file
23605
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll
Normal file
File diff suppressed because it is too large
Load Diff
22648
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll
Normal file
22648
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll
Normal file
File diff suppressed because it is too large
Load Diff
23605
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll
Normal file
23605
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll
Normal file
File diff suppressed because it is too large
Load Diff
23651
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll
Normal file
23651
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,172 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1200 %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1250 %s
|
||||
|
||||
define void @system_atomic_store_unordered_float(ptr addrspace(5) %addr, float %val) {
|
||||
; GFX1200-LABEL: define void @system_atomic_store_unordered_float(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; GFX1200-NEXT: store float [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret void
|
||||
;
|
||||
; GFX1250-LABEL: define void @system_atomic_store_unordered_float(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: store atomic float [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||
; GFX1250-NEXT: ret void
|
||||
;
|
||||
store atomic float %val, ptr addrspace(5) %addr unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @system_atomic_store_unordered_i32(ptr addrspace(5) %addr, i32 %val) {
|
||||
; GFX1200-LABEL: define void @system_atomic_store_unordered_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret void
|
||||
;
|
||||
; GFX1250-LABEL: define void @system_atomic_store_unordered_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||
; GFX1250-NEXT: ret void
|
||||
;
|
||||
store atomic i32 %val, ptr addrspace(5) %addr unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @system_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) {
|
||||
; GFX1200-LABEL: define void @system_atomic_store_release_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret void
|
||||
;
|
||||
; GFX1250-LABEL: define void @system_atomic_store_release_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] release, align 4
|
||||
; GFX1250-NEXT: ret void
|
||||
;
|
||||
store atomic i32 %val, ptr addrspace(5) %addr release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @workgroup_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) {
|
||||
; GFX1200-LABEL: define void @workgroup_atomic_store_release_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret void
|
||||
;
|
||||
; GFX1250-LABEL: define void @workgroup_atomic_store_release_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] syncscope("workgroup") release, align 4
|
||||
; GFX1250-NEXT: ret void
|
||||
;
|
||||
store atomic i32 %val, ptr addrspace(5) %addr syncscope("workgroup") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define float @system_atomic_load_unordered_float(ptr addrspace(5) %addr) {
|
||||
; GFX1200-LABEL: define float @system_atomic_load_unordered_float(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[VAL:%.*]] = load float, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret float [[VAL]]
|
||||
;
|
||||
; GFX1250-LABEL: define float @system_atomic_load_unordered_float(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = load atomic float, ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||
; GFX1250-NEXT: ret float [[VAL]]
|
||||
;
|
||||
%val = load atomic float, ptr addrspace(5) %addr unordered, align 4
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define i32 @system_atomic_load_unordered_i32(ptr addrspace(5) %addr) {
|
||||
; GFX1200-LABEL: define i32 @system_atomic_load_unordered_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
; GFX1250-LABEL: define i32 @system_atomic_load_unordered_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = load atomic i32, ptr addrspace(5) %addr unordered, align 4
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i32 @system_atomic_load_acquire_i32(ptr addrspace(5) %addr) {
|
||||
; GFX1200-LABEL: define i32 @system_atomic_load_acquire_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
; GFX1250-LABEL: define i32 @system_atomic_load_acquire_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] acquire, align 4
|
||||
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = load atomic i32, ptr addrspace(5) %addr acquire, align 4
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i32 @workgroup_atomic_load_acquire_i32(ptr addrspace(5) %addr) {
|
||||
; GFX1200-LABEL: define i32 @workgroup_atomic_load_acquire_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
; GFX1250-LABEL: define i32 @workgroup_atomic_load_acquire_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] syncscope("workgroup") acquire, align 4
|
||||
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = load atomic i32, ptr addrspace(5) %addr syncscope("workgroup") acquire, align 4
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(ptr addrspace(5) %addr, i32 %old, i32 %in) {
|
||||
; GFX1200-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[OLD]]
|
||||
; GFX1200-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[IN]], i32 [[TMP1]]
|
||||
; GFX1200-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0
|
||||
; GFX1200-NEXT: [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1
|
||||
; GFX1200-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; GFX1200-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
; GFX1250-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = cmpxchg volatile ptr [[SCRATCH_ASCAST]], i32 [[OLD]], i32 [[IN]] acq_rel acquire, align 4
|
||||
; GFX1250-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
|
||||
; GFX1250-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
%val = cmpxchg volatile ptr addrspace(5) %addr, i32 %old, i32 %in acq_rel acquire
|
||||
%res = extractvalue { i32, i1 } %val, 0
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @system_atomicrmw_add_acq_rel_i32(ptr addrspace(5) %addr, i32 %in) {
|
||||
; GFX1200-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32(
|
||||
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||
; GFX1200-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: store i32 [[IN]], ptr addrspace(5) [[ADDR]], align 4
|
||||
; GFX1200-NEXT: ret i32 [[TMP1]]
|
||||
;
|
||||
; GFX1250-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32(
|
||||
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||
; GFX1250-NEXT: [[VAL:%.*]] = atomicrmw volatile xchg ptr [[SCRATCH_ASCAST]], i32 [[IN]] acq_rel, align 4
|
||||
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
%val = atomicrmw volatile xchg ptr addrspace(5) %addr, i32 %in acq_rel
|
||||
ret i32 %val
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user