AMDGPU: Expand remaining system atomic operations (#122137)

System scope atomics need to use cmpxchg loops if we know
nothing about the allocation the address is from.
aea5980e26e6a87dab9f8acb10eb3a59dd143cb1 started this, this
expands the set to cover the remaining integer operations.

Don't expand xchg and add, those theoretically should work over PCIe.
This is a pre-commit which will introduce performance regressions.
Subsequent changes will add handling of new atomicrmw metadata, which
will avoid the expansion.

Note this still isn't conservative enough; we do need to expand
some device scope atomics if the memory is in fine-grained remote
memory.
This commit is contained in:
Matt Arsenault 2025-08-22 13:55:04 +09:00 committed by GitHub
parent 6a7ade03d1
commit 01f785cac4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 22152 additions and 4519 deletions

View File

@ -17840,28 +17840,41 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
auto Op = RMW->getOperation();
switch (Op) {
case AtomicRMWInst::Xchg: {
case AtomicRMWInst::Xchg:
// PCIe supports add and xchg for system atomics.
return isAtomicRMWLegalXChgTy(RMW)
? TargetLowering::AtomicExpansionKind::None
: TargetLowering::AtomicExpansionKind::CmpXChg;
}
case AtomicRMWInst::Add:
case AtomicRMWInst::And:
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap:
// PCIe supports add and xchg for system atomics.
return atomicSupportedIfLegalIntType(RMW);
case AtomicRMWInst::Sub:
case AtomicRMWInst::And:
case AtomicRMWInst::Or:
case AtomicRMWInst::Xor: {
case AtomicRMWInst::Xor:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap: {
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
// Always expand system scope atomics.
if (HasSystemScope && !Subtarget->hasEmulatedSystemScopeAtomics()) {
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
Op == AtomicRMWInst::Xor) {
// Atomic sub/or/xor do not work over PCI express, but atomic add
// does. InstCombine transforms these with 0 to or, so undo that.
if (HasSystemScope && AMDGPU::isFlatGlobalAddrSpace(AS)) {
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
ConstVal && ConstVal->isNullValue())
return AtomicExpansionKind::Expand;
}
return AtomicExpansionKind::CmpXChg;
}
}
return atomicSupportedIfLegalIntType(RMW);
}
case AtomicRMWInst::FAdd: {
@ -18014,18 +18027,6 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AtomicExpansionKind::CmpXChg;
}
case AtomicRMWInst::Min:
case AtomicRMWInst::Max:
case AtomicRMWInst::UMin:
case AtomicRMWInst::UMax: {
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
if (HasSystemScope && !Subtarget->hasEmulatedSystemScopeAtomics())
return AtomicExpansionKind::CmpXChg;
}
return atomicSupportedIfLegalIntType(RMW);
}
case AtomicRMWInst::Nand:
case AtomicRMWInst::FSub:
default:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -151,8 +151,17 @@ define i16 @test_atomicrmw_and_i16_global_system(ptr addrspace(1) %ptr, i16 %val
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] seq_cst, align 4
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[ANDOPERAND]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
@ -204,8 +213,17 @@ define i16 @test_atomicrmw_or_i16_global_system(ptr addrspace(1) %ptr, i16 %valu
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw or ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
@ -224,8 +242,17 @@ define i16 @test_atomicrmw_xor_i16_global_system(ptr addrspace(1) %ptr, i16 %val
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xor ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;

View File

@ -112,7 +112,16 @@ define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_fine_grained_memory_
define i32 @test_atomicrmw_sub_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst
@ -122,7 +131,16 @@ define i32 @test_atomicrmw_sub_i32_global_system(ptr addrspace(1) %ptr, i32 %val
define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -132,7 +150,16 @@ define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory(
define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
@ -142,7 +169,16 @@ define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_remote_memory(ptr ad
define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@ -157,7 +193,16 @@ define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory_
define i32 @test_atomicrmw_and_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst
@ -167,7 +212,16 @@ define i32 @test_atomicrmw_and_i32_global_system(ptr addrspace(1) %ptr, i32 %val
define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -177,7 +231,16 @@ define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory(
define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
@ -187,7 +250,16 @@ define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_remote_memory(ptr ad
define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@ -287,7 +359,16 @@ define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_fine_grained_memory
define i32 @test_atomicrmw_or_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst
@ -297,7 +378,16 @@ define i32 @test_atomicrmw_or_i32_global_system(ptr addrspace(1) %ptr, i32 %valu
define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -307,7 +397,16 @@ define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory(p
define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
@ -317,7 +416,16 @@ define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_remote_memory(ptr add
define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@ -332,7 +440,16 @@ define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory__
define i32 @test_atomicrmw_xor_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst
@ -342,7 +459,16 @@ define i32 @test_atomicrmw_xor_i32_global_system(ptr addrspace(1) %ptr, i32 %val
define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -352,7 +478,16 @@ define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory(
define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
@ -362,7 +497,16 @@ define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_remote_memory(ptr ad
define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@ -717,7 +861,18 @@ define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_fine_grained_memory
define i32 @test_atomicrmw_uinc_wrap_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst
@ -727,7 +882,18 @@ define i32 @test_atomicrmw_uinc_wrap_i32_global_system(ptr addrspace(1) %ptr, i3
define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -737,7 +903,18 @@ define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_m
define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
@ -747,7 +924,18 @@ define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_remote_memory(
define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@ -762,7 +950,20 @@ define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_m
define i32 @test_atomicrmw_udec_wrap_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst
@ -772,7 +973,20 @@ define i32 @test_atomicrmw_udec_wrap_i32_global_system(ptr addrspace(1) %ptr, i3
define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -782,7 +996,20 @@ define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_m
define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
@ -792,7 +1019,20 @@ define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_remote_memory(
define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0

View File

@ -112,7 +112,16 @@ define i64 @test_atomicrmw_add_i64_global_system__amdgpu_no_fine_grained_memory_
define i64 @test_atomicrmw_sub_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst
@ -122,7 +131,16 @@ define i64 @test_atomicrmw_sub_i64_global_system(ptr addrspace(1) %ptr, i64 %val
define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -132,7 +150,16 @@ define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory(
define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
@ -142,7 +169,16 @@ define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_remote_memory(ptr ad
define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@ -157,7 +193,16 @@ define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory_
define i64 @test_atomicrmw_and_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst
@ -167,7 +212,16 @@ define i64 @test_atomicrmw_and_i64_global_system(ptr addrspace(1) %ptr, i64 %val
define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -177,7 +231,16 @@ define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory(
define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
@ -187,7 +250,16 @@ define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_remote_memory(ptr ad
define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@ -287,7 +359,16 @@ define i64 @test_atomicrmw_nand_i64_global_system__amdgpu_no_fine_grained_memory
define i64 @test_atomicrmw_or_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst
@ -297,7 +378,16 @@ define i64 @test_atomicrmw_or_i64_global_system(ptr addrspace(1) %ptr, i64 %valu
define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -307,7 +397,16 @@ define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory(p
define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
@ -317,7 +416,16 @@ define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_remote_memory(ptr add
define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@ -332,7 +440,16 @@ define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory__
define i64 @test_atomicrmw_xor_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst
@ -342,7 +459,16 @@ define i64 @test_atomicrmw_xor_i64_global_system(ptr addrspace(1) %ptr, i64 %val
define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -352,7 +478,16 @@ define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory(
define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
@ -362,7 +497,16 @@ define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_remote_memory(ptr ad
define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@ -717,7 +861,18 @@ define i64 @test_atomicrmw_umin_i64_global_system__amdgpu_no_fine_grained_memory
define i64 @test_atomicrmw_uinc_wrap_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst
@ -727,7 +882,18 @@ define i64 @test_atomicrmw_uinc_wrap_i64_global_system(ptr addrspace(1) %ptr, i6
define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -737,7 +903,18 @@ define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_m
define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
@ -747,7 +924,18 @@ define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_remote_memory(
define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@ -762,7 +950,20 @@ define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_m
define i64 @test_atomicrmw_udec_wrap_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst
@ -772,7 +973,20 @@ define i64 @test_atomicrmw_udec_wrap_i64_global_system(ptr addrspace(1) %ptr, i6
define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
@ -782,7 +996,20 @@ define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_m
define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
@ -792,7 +1019,20 @@ define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_remote_memory(
define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; COMMON: atomicrmw.end:
; COMMON-NEXT: ret i64 [[NEWLOADED]]
;
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0

View File

@ -162,8 +162,17 @@ define i8 @test_atomicrmw_and_i8_global_system(ptr addrspace(1) %ptr, i8 %value)
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] seq_cst, align 4
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[ANDOPERAND]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
; CHECK-NEXT: ret i8 [[EXTRACTED]]
;
@ -215,8 +224,17 @@ define i8 @test_atomicrmw_or_i8_global_system(ptr addrspace(1) %ptr, i8 %value)
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw or ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
; CHECK-NEXT: ret i8 [[EXTRACTED]]
;
@ -235,8 +253,17 @@ define i8 @test_atomicrmw_xor_i8_global_system(ptr addrspace(1) %ptr, i8 %value)
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xor ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
; CHECK-NEXT: ret i8 [[EXTRACTED]]
;

View File

@ -93,7 +93,16 @@ define i32 @test_atomicrmw_or_0_local(ptr addrspace(3) %ptr) {
define i32 @test_atomicrmw_or_1_global_system(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_1_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 1 seq_cst, align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], 1
; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; CHECK-NEXT: [[RES]] = extractvalue { i32, i1 } [[TMP2]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 1 seq_cst
@ -103,7 +112,16 @@ define i32 @test_atomicrmw_or_1_global_system(ptr addrspace(1) %ptr) {
define i32 @test_atomicrmw_or_var_global_system(ptr addrspace(1) %ptr, i32 %val) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_var_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VAL]] seq_cst, align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VAL]]
; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
; CHECK-NEXT: [[RES]] = extractvalue { i32, i1 } [[TMP2]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst