Compare commits

...

4 Commits

Author SHA1 Message Date
pvanhout
6cd7c41646 [AMDGPU] Expand scratch atomics to flat atomics if GAS is enabled 2025-08-22 12:45:16 +02:00
pvanhout
3c6b5f75a5 [AMDGPU] Precommit memory legalizer tests for private AS 2025-08-22 12:45:15 +02:00
pvanhout
9cdf588d22 Rename "Expand" to "ExpandCustom" 2025-08-22 12:44:48 +02:00
pvanhout
d05704bce4 [CodeGen][TLI] Allow targets to custom expand atomic load/stores
Loads didn't have the `Expand` option in `AtomicExpandPass`. Stores had `Expand`  but it didn't defer to TLI and instead did an action directly.
Move the old behavior to a `XChg`  expansion and make `Expand` behave like all other instructions.
2025-08-22 10:14:26 +02:00
12 changed files with 117468 additions and 29 deletions

View File

@ -268,6 +268,7 @@ public:
CmpArithIntrinsic, // Use a target-specific intrinsic for special compare
// operations; used by X86.
Expand, // Generic expansion in terms of other atomic operations.
CustomExpand, // Custom target-specific expansion using TLI hooks.
// Rewrite to a non-atomic form for use in a known non-preemptible
// environment.
@ -2273,6 +2274,18 @@ public:
"Generic atomicrmw expansion unimplemented on this target");
}
/// Perform a atomic store using a target-specific way.
virtual void emitExpandAtomicStore(StoreInst *SI) const {
llvm_unreachable(
"Generic atomic store expansion unimplemented on this target");
}
/// Perform a atomic load using a target-specific way.
virtual void emitExpandAtomicLoad(LoadInst *LI) const {
llvm_unreachable(
"Generic atomic load expansion unimplemented on this target");
}
/// Perform a cmpxchg expansion using a target-specific method.
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
llvm_unreachable("Generic cmpxchg expansion unimplemented on this target");
@ -2377,8 +2390,8 @@ public:
}
/// Returns how the given (atomic) store should be expanded by the IR-level
/// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try
/// to use an atomicrmw xchg.
/// AtomicExpand pass into. For instance AtomicExpansionKind::CustomExpand
/// will try to use an atomicrmw xchg.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return AtomicExpansionKind::None;
}

View File

@ -84,7 +84,7 @@ private:
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
bool tryExpandAtomicStore(StoreInst *SI);
void expandAtomicStore(StoreInst *SI);
void expandAtomicStoreToXChg(StoreInst *SI);
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
Value *
@ -537,6 +537,9 @@ bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
LI->setAtomic(AtomicOrdering::NotAtomic);
return true;
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
TLI->emitExpandAtomicLoad(LI);
return true;
default:
llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
@ -546,8 +549,11 @@ bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
TLI->emitExpandAtomicStore(SI);
return true;
case TargetLoweringBase::AtomicExpansionKind::Expand:
expandAtomicStore(SI);
expandAtomicStoreToXChg(SI);
return true;
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
SI->setAtomic(AtomicOrdering::NotAtomic);
@ -620,7 +626,7 @@ StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
return NewSI;
}
void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
@ -741,7 +747,7 @@ bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
}
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
return lowerAtomicRMWInst(AI);
case TargetLoweringBase::AtomicExpansionKind::Expand:
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
TLI->emitExpandAtomicRMW(AI);
return true;
default:
@ -1695,7 +1701,7 @@ bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
return true;
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
return lowerAtomicCmpXchgInst(CI);
case TargetLoweringBase::AtomicExpansionKind::Expand: {
case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
TLI->emitExpandAtomicCmpXchg(CI);
return true;
}

View File

@ -17808,11 +17808,19 @@ static bool flatInstrMayAccessPrivate(const Instruction *I) {
!AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS);
}
static TargetLowering::AtomicExpansionKind
getPrivateAtomicExpansionKind(const GCNSubtarget &STI) {
// For GAS, lower to flat atomic.
return STI.hasGloballyAddressableScratch()
? TargetLowering::AtomicExpansionKind::Expand
: TargetLowering::AtomicExpansionKind::NotAtomic;
}
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
unsigned AS = RMW->getPointerAddressSpace();
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
return AtomicExpansionKind::NotAtomic;
return getPrivateAtomicExpansionKind(*getSubtarget());
// 64-bit flat atomics that dynamically reside in private memory will silently
// be dropped.
@ -17823,7 +17831,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
if (AS == AMDGPUAS::FLAT_ADDRESS &&
DL.getTypeSizeInBits(RMW->getType()) == 64 &&
flatInstrMayAccessPrivate(RMW))
return AtomicExpansionKind::Expand;
return AtomicExpansionKind::CustomExpand;
auto ReportUnsafeHWInst = [=](TargetLowering::AtomicExpansionKind Kind) {
OptimizationRemarkEmitter ORE(RMW->getFunction());
@ -17898,7 +17906,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// does. InstCombine transforms these with 0 to or, so undo that.
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
ConstVal && ConstVal->isNullValue())
return AtomicExpansionKind::Expand;
return AtomicExpansionKind::CustomExpand;
}
// If the allocation could be in remote, fine-grained memory, the rmw
@ -18027,9 +18035,9 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// fadd.
if (Subtarget->hasLDSFPAtomicAddF32()) {
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
return AtomicExpansionKind::Expand;
return AtomicExpansionKind::CustomExpand;
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
return AtomicExpansionKind::Expand;
return AtomicExpansionKind::CustomExpand;
}
}
}
@ -18083,14 +18091,14 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
? AtomicExpansionKind::NotAtomic
? getPrivateAtomicExpansionKind(*getSubtarget())
: AtomicExpansionKind::None;
}
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
? AtomicExpansionKind::NotAtomic
? getPrivateAtomicExpansionKind(*getSubtarget())
: AtomicExpansionKind::None;
}
@ -18098,7 +18106,7 @@ TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
unsigned AddrSpace = CmpX->getPointerAddressSpace();
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
return AtomicExpansionKind::NotAtomic;
return getPrivateAtomicExpansionKind(*getSubtarget());
if (AddrSpace != AMDGPUAS::FLAT_ADDRESS || !flatInstrMayAccessPrivate(CmpX))
return AtomicExpansionKind::None;
@ -18109,7 +18117,7 @@ SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
// If a 64-bit flat atomic may alias private, we need to avoid using the
// atomic in the private case.
return DL.getTypeSizeInBits(ValTy) == 64 ? AtomicExpansionKind::Expand
return DL.getTypeSizeInBits(ValTy) == 64 ? AtomicExpansionKind::CustomExpand
: AtomicExpansionKind::None;
}
@ -18468,9 +18476,24 @@ void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
Builder.CreateBr(ExitBB);
}
static void convertScratchAtomicToFlatAtomic(Instruction *I,
unsigned PtrOpIdx) {
Value *PtrOp = I->getOperand(PtrOpIdx);
assert(PtrOp->getType()->getPointerAddressSpace() ==
AMDGPUAS::PRIVATE_ADDRESS);
Type *FlatPtr = PointerType::get(I->getContext(), AMDGPUAS::FLAT_ADDRESS);
Value *ASCast = CastInst::CreatePointerCast(PtrOp, FlatPtr, "scratch.ascast",
I->getIterator());
I->setOperand(PtrOpIdx, ASCast);
}
void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
AtomicRMWInst::BinOp Op = AI->getOperation();
if (AI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return convertScratchAtomicToFlatAtomic(AI, AI->getPointerOperandIndex());
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
Op == AtomicRMWInst::Xor) {
if (const auto *ConstVal = dyn_cast<Constant>(AI->getValOperand());
@ -18493,9 +18516,28 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
}
void SITargetLowering::emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
if (CI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return convertScratchAtomicToFlatAtomic(CI, CI->getPointerOperandIndex());
emitExpandAtomicAddrSpacePredicate(CI);
}
void SITargetLowering::emitExpandAtomicLoad(LoadInst *LI) const {
if (LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return convertScratchAtomicToFlatAtomic(LI, LI->getPointerOperandIndex());
llvm_unreachable(
"Expand Atomic Load only handles SCRATCH -> FLAT conversion");
}
void SITargetLowering::emitExpandAtomicStore(StoreInst *SI) const {
if (SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return convertScratchAtomicToFlatAtomic(SI, SI->getPointerOperandIndex());
llvm_unreachable(
"Expand Atomic Store only handles SCRATCH -> FLAT conversion");
}
LoadInst *
SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
IRBuilder<> Builder(AI);

View File

@ -562,6 +562,8 @@ public:
void emitExpandAtomicAddrSpacePredicate(Instruction *AI) const;
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const override;
void emitExpandAtomicLoad(LoadInst *LI) const override;
void emitExpandAtomicStore(StoreInst *SI) const override;
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;

View File

@ -7893,7 +7893,7 @@ LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
AI->getOperation() == AtomicRMWInst::Or ||
AI->getOperation() == AtomicRMWInst::Xor))
return AtomicExpansionKind::Expand;
return AtomicExpansionKind::CustomExpand;
if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
return AtomicExpansionKind::CmpXChg;
}

View File

@ -86,15 +86,3 @@ entry:
store atomic i32 %val, ptr addrspace(3) %dst syncscope("wavefront") unordered, align 4
ret void
}
; GCN: scratch_atomic_store:
; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
; GCN: .amdhsa_kernel scratch_atomic_store
; CU: .amdhsa_uses_cu_stores 1
; NOCU: .amdhsa_uses_cu_stores 0
define amdgpu_kernel void @scratch_atomic_store(ptr addrspace(5) %dst, i32 %val) {
entry:
store atomic i32 %val, ptr addrspace(5) %dst syncscope("wavefront") unordered, align 4
ret void
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,172 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1200 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1250 %s
define void @system_atomic_store_unordered_float(ptr addrspace(5) %addr, float %val) {
; GFX1200-LABEL: define void @system_atomic_store_unordered_float(
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX1200-NEXT: store float [[VAL]], ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: ret void
;
; GFX1250-LABEL: define void @system_atomic_store_unordered_float(
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
; GFX1250-NEXT: store atomic float [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4
; GFX1250-NEXT: ret void
;
store atomic float %val, ptr addrspace(5) %addr unordered, align 4
ret void
}
define void @system_atomic_store_unordered_i32(ptr addrspace(5) %addr, i32 %val) {
; GFX1200-LABEL: define void @system_atomic_store_unordered_i32(
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: ret void
;
; GFX1250-LABEL: define void @system_atomic_store_unordered_i32(
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4
; GFX1250-NEXT: ret void
;
store atomic i32 %val, ptr addrspace(5) %addr unordered, align 4
ret void
}
define void @system_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) {
; GFX1200-LABEL: define void @system_atomic_store_release_i32(
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: ret void
;
; GFX1250-LABEL: define void @system_atomic_store_release_i32(
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] release, align 4
; GFX1250-NEXT: ret void
;
store atomic i32 %val, ptr addrspace(5) %addr release, align 4
ret void
}
define void @workgroup_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) {
; GFX1200-LABEL: define void @workgroup_atomic_store_release_i32(
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: ret void
;
; GFX1250-LABEL: define void @workgroup_atomic_store_release_i32(
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] syncscope("workgroup") release, align 4
; GFX1250-NEXT: ret void
;
store atomic i32 %val, ptr addrspace(5) %addr syncscope("workgroup") release, align 4
ret void
}
define float @system_atomic_load_unordered_float(ptr addrspace(5) %addr) {
; GFX1200-LABEL: define float @system_atomic_load_unordered_float(
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
; GFX1200-NEXT: [[VAL:%.*]] = load float, ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: ret float [[VAL]]
;
; GFX1250-LABEL: define float @system_atomic_load_unordered_float(
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
; GFX1250-NEXT: [[VAL:%.*]] = load atomic float, ptr [[SCRATCH_ASCAST]] unordered, align 4
; GFX1250-NEXT: ret float [[VAL]]
;
%val = load atomic float, ptr addrspace(5) %addr unordered, align 4
ret float %val
}
define i32 @system_atomic_load_unordered_i32(ptr addrspace(5) %addr) {
; GFX1200-LABEL: define i32 @system_atomic_load_unordered_i32(
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: ret i32 [[VAL]]
;
; GFX1250-LABEL: define i32 @system_atomic_load_unordered_i32(
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] unordered, align 4
; GFX1250-NEXT: ret i32 [[VAL]]
;
%val = load atomic i32, ptr addrspace(5) %addr unordered, align 4
ret i32 %val
}
define i32 @system_atomic_load_acquire_i32(ptr addrspace(5) %addr) {
; GFX1200-LABEL: define i32 @system_atomic_load_acquire_i32(
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: ret i32 [[VAL]]
;
; GFX1250-LABEL: define i32 @system_atomic_load_acquire_i32(
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] acquire, align 4
; GFX1250-NEXT: ret i32 [[VAL]]
;
%val = load atomic i32, ptr addrspace(5) %addr acquire, align 4
ret i32 %val
}
define i32 @workgroup_atomic_load_acquire_i32(ptr addrspace(5) %addr) {
; GFX1200-LABEL: define i32 @workgroup_atomic_load_acquire_i32(
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: ret i32 [[VAL]]
;
; GFX1250-LABEL: define i32 @workgroup_atomic_load_acquire_i32(
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] syncscope("workgroup") acquire, align 4
; GFX1250-NEXT: ret i32 [[VAL]]
;
%val = load atomic i32, ptr addrspace(5) %addr syncscope("workgroup") acquire, align 4
ret i32 %val
}
define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(ptr addrspace(5) %addr, i32 %old, i32 %in) {
; GFX1200-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
; GFX1200-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[OLD]]
; GFX1200-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[IN]], i32 [[TMP1]]
; GFX1200-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0
; GFX1200-NEXT: [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1
; GFX1200-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; GFX1200-NEXT: ret i32 [[RES]]
;
; GFX1250-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
; GFX1250-NEXT: [[VAL:%.*]] = cmpxchg volatile ptr [[SCRATCH_ASCAST]], i32 [[OLD]], i32 [[IN]] acq_rel acquire, align 4
; GFX1250-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
; GFX1250-NEXT: ret i32 [[RES]]
;
%val = cmpxchg volatile ptr addrspace(5) %addr, i32 %old, i32 %in acq_rel acquire
%res = extractvalue { i32, i1 } %val, 0
ret i32 %res
}
define i32 @system_atomicrmw_add_acq_rel_i32(ptr addrspace(5) %addr, i32 %in) {
; GFX1200-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32(
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
; GFX1200-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: store i32 [[IN]], ptr addrspace(5) [[ADDR]], align 4
; GFX1200-NEXT: ret i32 [[TMP1]]
;
; GFX1250-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32(
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
; GFX1250-NEXT: [[VAL:%.*]] = atomicrmw volatile xchg ptr [[SCRATCH_ASCAST]], i32 [[IN]] acq_rel, align 4
; GFX1250-NEXT: ret i32 [[VAL]]
;
%val = atomicrmw volatile xchg ptr addrspace(5) %addr, i32 %in acq_rel
ret i32 %val
}