Compare commits
4 Commits
main
...
users/pier
Author | SHA1 | Date | |
---|---|---|---|
![]() |
6cd7c41646 | ||
![]() |
3c6b5f75a5 | ||
![]() |
9cdf588d22 | ||
![]() |
d05704bce4 |
@ -268,6 +268,7 @@ public:
|
|||||||
CmpArithIntrinsic, // Use a target-specific intrinsic for special compare
|
CmpArithIntrinsic, // Use a target-specific intrinsic for special compare
|
||||||
// operations; used by X86.
|
// operations; used by X86.
|
||||||
Expand, // Generic expansion in terms of other atomic operations.
|
Expand, // Generic expansion in terms of other atomic operations.
|
||||||
|
CustomExpand, // Custom target-specific expansion using TLI hooks.
|
||||||
|
|
||||||
// Rewrite to a non-atomic form for use in a known non-preemptible
|
// Rewrite to a non-atomic form for use in a known non-preemptible
|
||||||
// environment.
|
// environment.
|
||||||
@ -2273,6 +2274,18 @@ public:
|
|||||||
"Generic atomicrmw expansion unimplemented on this target");
|
"Generic atomicrmw expansion unimplemented on this target");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Perform a atomic store using a target-specific way.
|
||||||
|
virtual void emitExpandAtomicStore(StoreInst *SI) const {
|
||||||
|
llvm_unreachable(
|
||||||
|
"Generic atomic store expansion unimplemented on this target");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform a atomic load using a target-specific way.
|
||||||
|
virtual void emitExpandAtomicLoad(LoadInst *LI) const {
|
||||||
|
llvm_unreachable(
|
||||||
|
"Generic atomic load expansion unimplemented on this target");
|
||||||
|
}
|
||||||
|
|
||||||
/// Perform a cmpxchg expansion using a target-specific method.
|
/// Perform a cmpxchg expansion using a target-specific method.
|
||||||
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
|
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
|
||||||
llvm_unreachable("Generic cmpxchg expansion unimplemented on this target");
|
llvm_unreachable("Generic cmpxchg expansion unimplemented on this target");
|
||||||
@ -2377,8 +2390,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns how the given (atomic) store should be expanded by the IR-level
|
/// Returns how the given (atomic) store should be expanded by the IR-level
|
||||||
/// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try
|
/// AtomicExpand pass into. For instance AtomicExpansionKind::CustomExpand
|
||||||
/// to use an atomicrmw xchg.
|
/// will try to use an atomicrmw xchg.
|
||||||
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const {
|
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const {
|
||||||
return AtomicExpansionKind::None;
|
return AtomicExpansionKind::None;
|
||||||
}
|
}
|
||||||
|
@ -84,7 +84,7 @@ private:
|
|||||||
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
|
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
|
||||||
StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
|
StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
|
||||||
bool tryExpandAtomicStore(StoreInst *SI);
|
bool tryExpandAtomicStore(StoreInst *SI);
|
||||||
void expandAtomicStore(StoreInst *SI);
|
void expandAtomicStoreToXChg(StoreInst *SI);
|
||||||
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
|
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
|
||||||
AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
|
AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
|
||||||
Value *
|
Value *
|
||||||
@ -537,6 +537,9 @@ bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
|
|||||||
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
||||||
LI->setAtomic(AtomicOrdering::NotAtomic);
|
LI->setAtomic(AtomicOrdering::NotAtomic);
|
||||||
return true;
|
return true;
|
||||||
|
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
|
||||||
|
TLI->emitExpandAtomicLoad(LI);
|
||||||
|
return true;
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
|
llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
|
||||||
}
|
}
|
||||||
@ -546,8 +549,11 @@ bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
|
|||||||
switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
|
switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
|
||||||
case TargetLoweringBase::AtomicExpansionKind::None:
|
case TargetLoweringBase::AtomicExpansionKind::None:
|
||||||
return false;
|
return false;
|
||||||
|
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
|
||||||
|
TLI->emitExpandAtomicStore(SI);
|
||||||
|
return true;
|
||||||
case TargetLoweringBase::AtomicExpansionKind::Expand:
|
case TargetLoweringBase::AtomicExpansionKind::Expand:
|
||||||
expandAtomicStore(SI);
|
expandAtomicStoreToXChg(SI);
|
||||||
return true;
|
return true;
|
||||||
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
||||||
SI->setAtomic(AtomicOrdering::NotAtomic);
|
SI->setAtomic(AtomicOrdering::NotAtomic);
|
||||||
@ -620,7 +626,7 @@ StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
|
|||||||
return NewSI;
|
return NewSI;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
|
void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
|
||||||
// This function is only called on atomic stores that are too large to be
|
// This function is only called on atomic stores that are too large to be
|
||||||
// atomic if implemented as a native store. So we replace them by an
|
// atomic if implemented as a native store. So we replace them by an
|
||||||
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
|
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
|
||||||
@ -741,7 +747,7 @@ bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
|
|||||||
}
|
}
|
||||||
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
||||||
return lowerAtomicRMWInst(AI);
|
return lowerAtomicRMWInst(AI);
|
||||||
case TargetLoweringBase::AtomicExpansionKind::Expand:
|
case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
|
||||||
TLI->emitExpandAtomicRMW(AI);
|
TLI->emitExpandAtomicRMW(AI);
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
@ -1695,7 +1701,7 @@ bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
|||||||
return true;
|
return true;
|
||||||
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
|
||||||
return lowerAtomicCmpXchgInst(CI);
|
return lowerAtomicCmpXchgInst(CI);
|
||||||
case TargetLoweringBase::AtomicExpansionKind::Expand: {
|
case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
|
||||||
TLI->emitExpandAtomicCmpXchg(CI);
|
TLI->emitExpandAtomicCmpXchg(CI);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -17808,11 +17808,19 @@ static bool flatInstrMayAccessPrivate(const Instruction *I) {
|
|||||||
!AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS);
|
!AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static TargetLowering::AtomicExpansionKind
|
||||||
|
getPrivateAtomicExpansionKind(const GCNSubtarget &STI) {
|
||||||
|
// For GAS, lower to flat atomic.
|
||||||
|
return STI.hasGloballyAddressableScratch()
|
||||||
|
? TargetLowering::AtomicExpansionKind::Expand
|
||||||
|
: TargetLowering::AtomicExpansionKind::NotAtomic;
|
||||||
|
}
|
||||||
|
|
||||||
TargetLowering::AtomicExpansionKind
|
TargetLowering::AtomicExpansionKind
|
||||||
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||||
unsigned AS = RMW->getPointerAddressSpace();
|
unsigned AS = RMW->getPointerAddressSpace();
|
||||||
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
|
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
|
||||||
return AtomicExpansionKind::NotAtomic;
|
return getPrivateAtomicExpansionKind(*getSubtarget());
|
||||||
|
|
||||||
// 64-bit flat atomics that dynamically reside in private memory will silently
|
// 64-bit flat atomics that dynamically reside in private memory will silently
|
||||||
// be dropped.
|
// be dropped.
|
||||||
@ -17823,7 +17831,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
|||||||
if (AS == AMDGPUAS::FLAT_ADDRESS &&
|
if (AS == AMDGPUAS::FLAT_ADDRESS &&
|
||||||
DL.getTypeSizeInBits(RMW->getType()) == 64 &&
|
DL.getTypeSizeInBits(RMW->getType()) == 64 &&
|
||||||
flatInstrMayAccessPrivate(RMW))
|
flatInstrMayAccessPrivate(RMW))
|
||||||
return AtomicExpansionKind::Expand;
|
return AtomicExpansionKind::CustomExpand;
|
||||||
|
|
||||||
auto ReportUnsafeHWInst = [=](TargetLowering::AtomicExpansionKind Kind) {
|
auto ReportUnsafeHWInst = [=](TargetLowering::AtomicExpansionKind Kind) {
|
||||||
OptimizationRemarkEmitter ORE(RMW->getFunction());
|
OptimizationRemarkEmitter ORE(RMW->getFunction());
|
||||||
@ -17898,7 +17906,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
|||||||
// does. InstCombine transforms these with 0 to or, so undo that.
|
// does. InstCombine transforms these with 0 to or, so undo that.
|
||||||
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
|
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
|
||||||
ConstVal && ConstVal->isNullValue())
|
ConstVal && ConstVal->isNullValue())
|
||||||
return AtomicExpansionKind::Expand;
|
return AtomicExpansionKind::CustomExpand;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the allocation could be in remote, fine-grained memory, the rmw
|
// If the allocation could be in remote, fine-grained memory, the rmw
|
||||||
@ -18027,9 +18035,9 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
|||||||
// fadd.
|
// fadd.
|
||||||
if (Subtarget->hasLDSFPAtomicAddF32()) {
|
if (Subtarget->hasLDSFPAtomicAddF32()) {
|
||||||
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
|
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
|
||||||
return AtomicExpansionKind::Expand;
|
return AtomicExpansionKind::CustomExpand;
|
||||||
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
|
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
|
||||||
return AtomicExpansionKind::Expand;
|
return AtomicExpansionKind::CustomExpand;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -18083,14 +18091,14 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
|||||||
TargetLowering::AtomicExpansionKind
|
TargetLowering::AtomicExpansionKind
|
||||||
SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
|
SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
|
||||||
return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
|
return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
|
||||||
? AtomicExpansionKind::NotAtomic
|
? getPrivateAtomicExpansionKind(*getSubtarget())
|
||||||
: AtomicExpansionKind::None;
|
: AtomicExpansionKind::None;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetLowering::AtomicExpansionKind
|
TargetLowering::AtomicExpansionKind
|
||||||
SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
|
SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
|
||||||
return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
|
return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
|
||||||
? AtomicExpansionKind::NotAtomic
|
? getPrivateAtomicExpansionKind(*getSubtarget())
|
||||||
: AtomicExpansionKind::None;
|
: AtomicExpansionKind::None;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -18098,7 +18106,7 @@ TargetLowering::AtomicExpansionKind
|
|||||||
SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
|
SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
|
||||||
unsigned AddrSpace = CmpX->getPointerAddressSpace();
|
unsigned AddrSpace = CmpX->getPointerAddressSpace();
|
||||||
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
|
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
|
||||||
return AtomicExpansionKind::NotAtomic;
|
return getPrivateAtomicExpansionKind(*getSubtarget());
|
||||||
|
|
||||||
if (AddrSpace != AMDGPUAS::FLAT_ADDRESS || !flatInstrMayAccessPrivate(CmpX))
|
if (AddrSpace != AMDGPUAS::FLAT_ADDRESS || !flatInstrMayAccessPrivate(CmpX))
|
||||||
return AtomicExpansionKind::None;
|
return AtomicExpansionKind::None;
|
||||||
@ -18109,7 +18117,7 @@ SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
|
|||||||
|
|
||||||
// If a 64-bit flat atomic may alias private, we need to avoid using the
|
// If a 64-bit flat atomic may alias private, we need to avoid using the
|
||||||
// atomic in the private case.
|
// atomic in the private case.
|
||||||
return DL.getTypeSizeInBits(ValTy) == 64 ? AtomicExpansionKind::Expand
|
return DL.getTypeSizeInBits(ValTy) == 64 ? AtomicExpansionKind::CustomExpand
|
||||||
: AtomicExpansionKind::None;
|
: AtomicExpansionKind::None;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -18468,9 +18476,24 @@ void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
|
|||||||
Builder.CreateBr(ExitBB);
|
Builder.CreateBr(ExitBB);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void convertScratchAtomicToFlatAtomic(Instruction *I,
|
||||||
|
unsigned PtrOpIdx) {
|
||||||
|
Value *PtrOp = I->getOperand(PtrOpIdx);
|
||||||
|
assert(PtrOp->getType()->getPointerAddressSpace() ==
|
||||||
|
AMDGPUAS::PRIVATE_ADDRESS);
|
||||||
|
|
||||||
|
Type *FlatPtr = PointerType::get(I->getContext(), AMDGPUAS::FLAT_ADDRESS);
|
||||||
|
Value *ASCast = CastInst::CreatePointerCast(PtrOp, FlatPtr, "scratch.ascast",
|
||||||
|
I->getIterator());
|
||||||
|
I->setOperand(PtrOpIdx, ASCast);
|
||||||
|
}
|
||||||
|
|
||||||
void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
|
void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
|
||||||
AtomicRMWInst::BinOp Op = AI->getOperation();
|
AtomicRMWInst::BinOp Op = AI->getOperation();
|
||||||
|
|
||||||
|
if (AI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||||
|
return convertScratchAtomicToFlatAtomic(AI, AI->getPointerOperandIndex());
|
||||||
|
|
||||||
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
|
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
|
||||||
Op == AtomicRMWInst::Xor) {
|
Op == AtomicRMWInst::Xor) {
|
||||||
if (const auto *ConstVal = dyn_cast<Constant>(AI->getValOperand());
|
if (const auto *ConstVal = dyn_cast<Constant>(AI->getValOperand());
|
||||||
@ -18493,9 +18516,28 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void SITargetLowering::emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
|
void SITargetLowering::emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
|
||||||
|
if (CI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||||
|
return convertScratchAtomicToFlatAtomic(CI, CI->getPointerOperandIndex());
|
||||||
|
|
||||||
emitExpandAtomicAddrSpacePredicate(CI);
|
emitExpandAtomicAddrSpacePredicate(CI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SITargetLowering::emitExpandAtomicLoad(LoadInst *LI) const {
|
||||||
|
if (LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||||
|
return convertScratchAtomicToFlatAtomic(LI, LI->getPointerOperandIndex());
|
||||||
|
|
||||||
|
llvm_unreachable(
|
||||||
|
"Expand Atomic Load only handles SCRATCH -> FLAT conversion");
|
||||||
|
}
|
||||||
|
|
||||||
|
void SITargetLowering::emitExpandAtomicStore(StoreInst *SI) const {
|
||||||
|
if (SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||||
|
return convertScratchAtomicToFlatAtomic(SI, SI->getPointerOperandIndex());
|
||||||
|
|
||||||
|
llvm_unreachable(
|
||||||
|
"Expand Atomic Store only handles SCRATCH -> FLAT conversion");
|
||||||
|
}
|
||||||
|
|
||||||
LoadInst *
|
LoadInst *
|
||||||
SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
|
SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
|
||||||
IRBuilder<> Builder(AI);
|
IRBuilder<> Builder(AI);
|
||||||
|
@ -562,6 +562,8 @@ public:
|
|||||||
void emitExpandAtomicAddrSpacePredicate(Instruction *AI) const;
|
void emitExpandAtomicAddrSpacePredicate(Instruction *AI) const;
|
||||||
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
|
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
|
||||||
void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const override;
|
void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const override;
|
||||||
|
void emitExpandAtomicLoad(LoadInst *LI) const override;
|
||||||
|
void emitExpandAtomicStore(StoreInst *SI) const override;
|
||||||
|
|
||||||
LoadInst *
|
LoadInst *
|
||||||
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
|
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
|
||||||
|
@ -7893,7 +7893,7 @@ LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
|||||||
if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
|
if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
|
||||||
AI->getOperation() == AtomicRMWInst::Or ||
|
AI->getOperation() == AtomicRMWInst::Or ||
|
||||||
AI->getOperation() == AtomicRMWInst::Xor))
|
AI->getOperation() == AtomicRMWInst::Xor))
|
||||||
return AtomicExpansionKind::Expand;
|
return AtomicExpansionKind::CustomExpand;
|
||||||
if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
|
if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
|
||||||
return AtomicExpansionKind::CmpXChg;
|
return AtomicExpansionKind::CmpXChg;
|
||||||
}
|
}
|
||||||
|
@ -86,15 +86,3 @@ entry:
|
|||||||
store atomic i32 %val, ptr addrspace(3) %dst syncscope("wavefront") unordered, align 4
|
store atomic i32 %val, ptr addrspace(3) %dst syncscope("wavefront") unordered, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; GCN: scratch_atomic_store:
|
|
||||||
; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
|
|
||||||
; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
|
|
||||||
; GCN: .amdhsa_kernel scratch_atomic_store
|
|
||||||
; CU: .amdhsa_uses_cu_stores 1
|
|
||||||
; NOCU: .amdhsa_uses_cu_stores 0
|
|
||||||
define amdgpu_kernel void @scratch_atomic_store(ptr addrspace(5) %dst, i32 %val) {
|
|
||||||
entry:
|
|
||||||
store atomic i32 %val, ptr addrspace(5) %dst syncscope("wavefront") unordered, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
23707
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll
Normal file
23707
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll
Normal file
File diff suppressed because it is too large
Load Diff
23605
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll
Normal file
23605
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll
Normal file
File diff suppressed because it is too large
Load Diff
22648
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll
Normal file
22648
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll
Normal file
File diff suppressed because it is too large
Load Diff
23605
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll
Normal file
23605
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll
Normal file
File diff suppressed because it is too large
Load Diff
23651
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll
Normal file
23651
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,172 @@
|
|||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||||
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1200 %s
|
||||||
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1250 %s
|
||||||
|
|
||||||
|
define void @system_atomic_store_unordered_float(ptr addrspace(5) %addr, float %val) {
|
||||||
|
; GFX1200-LABEL: define void @system_atomic_store_unordered_float(
|
||||||
|
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||||
|
; GFX1200-NEXT: store float [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: ret void
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: define void @system_atomic_store_unordered_float(
|
||||||
|
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||||
|
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||||
|
; GFX1250-NEXT: store atomic float [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||||
|
; GFX1250-NEXT: ret void
|
||||||
|
;
|
||||||
|
store atomic float %val, ptr addrspace(5) %addr unordered, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @system_atomic_store_unordered_i32(ptr addrspace(5) %addr, i32 %val) {
|
||||||
|
; GFX1200-LABEL: define void @system_atomic_store_unordered_i32(
|
||||||
|
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: ret void
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: define void @system_atomic_store_unordered_i32(
|
||||||
|
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||||
|
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||||
|
; GFX1250-NEXT: ret void
|
||||||
|
;
|
||||||
|
store atomic i32 %val, ptr addrspace(5) %addr unordered, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @system_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) {
|
||||||
|
; GFX1200-LABEL: define void @system_atomic_store_release_i32(
|
||||||
|
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: ret void
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: define void @system_atomic_store_release_i32(
|
||||||
|
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||||
|
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] release, align 4
|
||||||
|
; GFX1250-NEXT: ret void
|
||||||
|
;
|
||||||
|
store atomic i32 %val, ptr addrspace(5) %addr release, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @workgroup_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) {
|
||||||
|
; GFX1200-LABEL: define void @workgroup_atomic_store_release_i32(
|
||||||
|
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: ret void
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: define void @workgroup_atomic_store_release_i32(
|
||||||
|
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||||
|
; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] syncscope("workgroup") release, align 4
|
||||||
|
; GFX1250-NEXT: ret void
|
||||||
|
;
|
||||||
|
store atomic i32 %val, ptr addrspace(5) %addr syncscope("workgroup") release, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @system_atomic_load_unordered_float(ptr addrspace(5) %addr) {
|
||||||
|
; GFX1200-LABEL: define float @system_atomic_load_unordered_float(
|
||||||
|
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1200-NEXT: [[VAL:%.*]] = load float, ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: ret float [[VAL]]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: define float @system_atomic_load_unordered_float(
|
||||||
|
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||||
|
; GFX1250-NEXT: [[VAL:%.*]] = load atomic float, ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||||
|
; GFX1250-NEXT: ret float [[VAL]]
|
||||||
|
;
|
||||||
|
%val = load atomic float, ptr addrspace(5) %addr unordered, align 4
|
||||||
|
ret float %val
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @system_atomic_load_unordered_i32(ptr addrspace(5) %addr) {
|
||||||
|
; GFX1200-LABEL: define i32 @system_atomic_load_unordered_i32(
|
||||||
|
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: ret i32 [[VAL]]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: define i32 @system_atomic_load_unordered_i32(
|
||||||
|
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||||
|
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] unordered, align 4
|
||||||
|
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||||
|
;
|
||||||
|
%val = load atomic i32, ptr addrspace(5) %addr unordered, align 4
|
||||||
|
ret i32 %val
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @system_atomic_load_acquire_i32(ptr addrspace(5) %addr) {
|
||||||
|
; GFX1200-LABEL: define i32 @system_atomic_load_acquire_i32(
|
||||||
|
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: ret i32 [[VAL]]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: define i32 @system_atomic_load_acquire_i32(
|
||||||
|
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||||
|
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] acquire, align 4
|
||||||
|
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||||
|
;
|
||||||
|
%val = load atomic i32, ptr addrspace(5) %addr acquire, align 4
|
||||||
|
ret i32 %val
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @workgroup_atomic_load_acquire_i32(ptr addrspace(5) %addr) {
|
||||||
|
; GFX1200-LABEL: define i32 @workgroup_atomic_load_acquire_i32(
|
||||||
|
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: ret i32 [[VAL]]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: define i32 @workgroup_atomic_load_acquire_i32(
|
||||||
|
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||||
|
; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] syncscope("workgroup") acquire, align 4
|
||||||
|
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||||
|
;
|
||||||
|
%val = load atomic i32, ptr addrspace(5) %addr syncscope("workgroup") acquire, align 4
|
||||||
|
ret i32 %val
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(ptr addrspace(5) %addr, i32 %old, i32 %in) {
|
||||||
|
; GFX1200-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(
|
||||||
|
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1200-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[OLD]]
|
||||||
|
; GFX1200-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[IN]], i32 [[TMP1]]
|
||||||
|
; GFX1200-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0
|
||||||
|
; GFX1200-NEXT: [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1
|
||||||
|
; GFX1200-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||||
|
; GFX1200-NEXT: ret i32 [[RES]]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(
|
||||||
|
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||||
|
; GFX1250-NEXT: [[VAL:%.*]] = cmpxchg volatile ptr [[SCRATCH_ASCAST]], i32 [[OLD]], i32 [[IN]] acq_rel acquire, align 4
|
||||||
|
; GFX1250-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
|
||||||
|
; GFX1250-NEXT: ret i32 [[RES]]
|
||||||
|
;
|
||||||
|
%val = cmpxchg volatile ptr addrspace(5) %addr, i32 %old, i32 %in acq_rel acquire
|
||||||
|
%res = extractvalue { i32, i1 } %val, 0
|
||||||
|
ret i32 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @system_atomicrmw_add_acq_rel_i32(ptr addrspace(5) %addr, i32 %in) {
|
||||||
|
; GFX1200-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32(
|
||||||
|
; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1200-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: store i32 [[IN]], ptr addrspace(5) [[ADDR]], align 4
|
||||||
|
; GFX1200-NEXT: ret i32 [[TMP1]]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32(
|
||||||
|
; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] {
|
||||||
|
; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr
|
||||||
|
; GFX1250-NEXT: [[VAL:%.*]] = atomicrmw volatile xchg ptr [[SCRATCH_ASCAST]], i32 [[IN]] acq_rel, align 4
|
||||||
|
; GFX1250-NEXT: ret i32 [[VAL]]
|
||||||
|
;
|
||||||
|
%val = atomicrmw volatile xchg ptr addrspace(5) %addr, i32 %in acq_rel
|
||||||
|
ret i32 %val
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user