AMDGPU: Expand remaining system atomic operations (#122137)
System scope atomics need to use cmpxchg loops if we know nothing about the allocation the address is from. aea5980e26e6a87dab9f8acb10eb3a59dd143cb1 started this, this expands the set to cover the remaining integer operations. Don't expand xchg and add, those theoretically should work over PCIe. This is a pre-commit which will introduce performance regressions. Subsequent changes will add handling of new atomicrmw metadata, which will avoid the expansion. Note this still isn't conservative enough; we do need to expand some device scope atomics if the memory is in fine-grained remote memory.
This commit is contained in:
parent
6a7ade03d1
commit
01f785cac4
@ -17840,28 +17840,41 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||
|
||||
auto Op = RMW->getOperation();
|
||||
switch (Op) {
|
||||
case AtomicRMWInst::Xchg: {
|
||||
case AtomicRMWInst::Xchg:
|
||||
// PCIe supports add and xchg for system atomics.
|
||||
return isAtomicRMWLegalXChgTy(RMW)
|
||||
? TargetLowering::AtomicExpansionKind::None
|
||||
: TargetLowering::AtomicExpansionKind::CmpXChg;
|
||||
}
|
||||
case AtomicRMWInst::Add:
|
||||
case AtomicRMWInst::And:
|
||||
case AtomicRMWInst::UIncWrap:
|
||||
case AtomicRMWInst::UDecWrap:
|
||||
// PCIe supports add and xchg for system atomics.
|
||||
return atomicSupportedIfLegalIntType(RMW);
|
||||
case AtomicRMWInst::Sub:
|
||||
case AtomicRMWInst::And:
|
||||
case AtomicRMWInst::Or:
|
||||
case AtomicRMWInst::Xor: {
|
||||
case AtomicRMWInst::Xor:
|
||||
case AtomicRMWInst::Max:
|
||||
case AtomicRMWInst::Min:
|
||||
case AtomicRMWInst::UMax:
|
||||
case AtomicRMWInst::UMin:
|
||||
case AtomicRMWInst::UIncWrap:
|
||||
case AtomicRMWInst::UDecWrap: {
|
||||
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
|
||||
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
|
||||
// Always expand system scope atomics.
|
||||
if (HasSystemScope && !Subtarget->hasEmulatedSystemScopeAtomics()) {
|
||||
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
|
||||
Op == AtomicRMWInst::Xor) {
|
||||
// Atomic sub/or/xor do not work over PCI express, but atomic add
|
||||
// does. InstCombine transforms these with 0 to or, so undo that.
|
||||
if (HasSystemScope && AMDGPU::isFlatGlobalAddrSpace(AS)) {
|
||||
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
|
||||
ConstVal && ConstVal->isNullValue())
|
||||
return AtomicExpansionKind::Expand;
|
||||
}
|
||||
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
}
|
||||
}
|
||||
|
||||
return atomicSupportedIfLegalIntType(RMW);
|
||||
}
|
||||
case AtomicRMWInst::FAdd: {
|
||||
@ -18014,18 +18027,6 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
||||
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
}
|
||||
case AtomicRMWInst::Min:
|
||||
case AtomicRMWInst::Max:
|
||||
case AtomicRMWInst::UMin:
|
||||
case AtomicRMWInst::UMax: {
|
||||
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
|
||||
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
|
||||
if (HasSystemScope && !Subtarget->hasEmulatedSystemScopeAtomics())
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
}
|
||||
|
||||
return atomicSupportedIfLegalIntType(RMW);
|
||||
}
|
||||
case AtomicRMWInst::Nand:
|
||||
case AtomicRMWInst::FSub:
|
||||
default:
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -151,8 +151,17 @@ define i16 @test_atomicrmw_and_i16_global_system(ptr addrspace(1) %ptr, i16 %val
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
|
||||
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] seq_cst, align 4
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
|
||||
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; CHECK: atomicrmw.start:
|
||||
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; CHECK-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[ANDOPERAND]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; CHECK: atomicrmw.end:
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
|
||||
; CHECK-NEXT: ret i16 [[EXTRACTED]]
|
||||
;
|
||||
@ -204,8 +213,17 @@ define i16 @test_atomicrmw_or_i16_global_system(ptr addrspace(1) %ptr, i16 %valu
|
||||
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
|
||||
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw or ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
|
||||
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; CHECK: atomicrmw.start:
|
||||
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; CHECK-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; CHECK: atomicrmw.end:
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
|
||||
; CHECK-NEXT: ret i16 [[EXTRACTED]]
|
||||
;
|
||||
@ -224,8 +242,17 @@ define i16 @test_atomicrmw_xor_i16_global_system(ptr addrspace(1) %ptr, i16 %val
|
||||
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
|
||||
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xor ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
|
||||
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; CHECK: atomicrmw.start:
|
||||
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; CHECK: atomicrmw.end:
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
|
||||
; CHECK-NEXT: ret i16 [[EXTRACTED]]
|
||||
;
|
||||
|
@ -112,7 +112,16 @@ define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_fine_grained_memory_
|
||||
define i32 @test_atomicrmw_sub_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst
|
||||
@ -122,7 +131,16 @@ define i32 @test_atomicrmw_sub_i32_global_system(ptr addrspace(1) %ptr, i32 %val
|
||||
define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -132,7 +150,16 @@ define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory(
|
||||
define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -142,7 +169,16 @@ define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_remote_memory(ptr ad
|
||||
define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
@ -157,7 +193,16 @@ define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory_
|
||||
define i32 @test_atomicrmw_and_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst
|
||||
@ -167,7 +212,16 @@ define i32 @test_atomicrmw_and_i32_global_system(ptr addrspace(1) %ptr, i32 %val
|
||||
define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -177,7 +231,16 @@ define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory(
|
||||
define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -187,7 +250,16 @@ define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_remote_memory(ptr ad
|
||||
define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
@ -287,7 +359,16 @@ define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_fine_grained_memory
|
||||
define i32 @test_atomicrmw_or_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst
|
||||
@ -297,7 +378,16 @@ define i32 @test_atomicrmw_or_i32_global_system(ptr addrspace(1) %ptr, i32 %valu
|
||||
define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -307,7 +397,16 @@ define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory(p
|
||||
define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -317,7 +416,16 @@ define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_remote_memory(ptr add
|
||||
define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
@ -332,7 +440,16 @@ define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory__
|
||||
define i32 @test_atomicrmw_xor_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst
|
||||
@ -342,7 +459,16 @@ define i32 @test_atomicrmw_xor_i32_global_system(ptr addrspace(1) %ptr, i32 %val
|
||||
define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -352,7 +478,16 @@ define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory(
|
||||
define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -362,7 +497,16 @@ define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_remote_memory(ptr ad
|
||||
define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
@ -717,7 +861,18 @@ define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_fine_grained_memory
|
||||
define i32 @test_atomicrmw_uinc_wrap_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst
|
||||
@ -727,7 +882,18 @@ define i32 @test_atomicrmw_uinc_wrap_i32_global_system(ptr addrspace(1) %ptr, i3
|
||||
define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -737,7 +903,18 @@ define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_m
|
||||
define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -747,7 +924,18 @@ define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_remote_memory(
|
||||
define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
@ -762,7 +950,20 @@ define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_m
|
||||
define i32 @test_atomicrmw_udec_wrap_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst
|
||||
@ -772,7 +973,20 @@ define i32 @test_atomicrmw_udec_wrap_i32_global_system(ptr addrspace(1) %ptr, i3
|
||||
define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -782,7 +996,20 @@ define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_m
|
||||
define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -792,7 +1019,20 @@ define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_remote_memory(
|
||||
define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
||||
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
|
@ -112,7 +112,16 @@ define i64 @test_atomicrmw_add_i64_global_system__amdgpu_no_fine_grained_memory_
|
||||
define i64 @test_atomicrmw_sub_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst
|
||||
@ -122,7 +131,16 @@ define i64 @test_atomicrmw_sub_i64_global_system(ptr addrspace(1) %ptr, i64 %val
|
||||
define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -132,7 +150,16 @@ define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory(
|
||||
define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -142,7 +169,16 @@ define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_remote_memory(ptr ad
|
||||
define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
@ -157,7 +193,16 @@ define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory_
|
||||
define i64 @test_atomicrmw_and_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst
|
||||
@ -167,7 +212,16 @@ define i64 @test_atomicrmw_and_i64_global_system(ptr addrspace(1) %ptr, i64 %val
|
||||
define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -177,7 +231,16 @@ define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory(
|
||||
define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -187,7 +250,16 @@ define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_remote_memory(ptr ad
|
||||
define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
@ -287,7 +359,16 @@ define i64 @test_atomicrmw_nand_i64_global_system__amdgpu_no_fine_grained_memory
|
||||
define i64 @test_atomicrmw_or_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst
|
||||
@ -297,7 +378,16 @@ define i64 @test_atomicrmw_or_i64_global_system(ptr addrspace(1) %ptr, i64 %valu
|
||||
define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -307,7 +397,16 @@ define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory(p
|
||||
define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -317,7 +416,16 @@ define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_remote_memory(ptr add
|
||||
define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
@ -332,7 +440,16 @@ define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory__
|
||||
define i64 @test_atomicrmw_xor_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst
|
||||
@ -342,7 +459,16 @@ define i64 @test_atomicrmw_xor_i64_global_system(ptr addrspace(1) %ptr, i64 %val
|
||||
define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -352,7 +478,16 @@ define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory(
|
||||
define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -362,7 +497,16 @@ define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_remote_memory(ptr ad
|
||||
define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
@ -717,7 +861,18 @@ define i64 @test_atomicrmw_umin_i64_global_system__amdgpu_no_fine_grained_memory
|
||||
define i64 @test_atomicrmw_uinc_wrap_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst
|
||||
@ -727,7 +882,18 @@ define i64 @test_atomicrmw_uinc_wrap_i64_global_system(ptr addrspace(1) %ptr, i6
|
||||
define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -737,7 +903,18 @@ define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_m
|
||||
define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -747,7 +924,18 @@ define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_remote_memory(
|
||||
define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
@ -762,7 +950,20 @@ define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_m
|
||||
define i64 @test_atomicrmw_udec_wrap_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst
|
||||
@ -772,7 +973,20 @@ define i64 @test_atomicrmw_udec_wrap_i64_global_system(ptr addrspace(1) %ptr, i6
|
||||
define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
@ -782,7 +996,20 @@ define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_m
|
||||
define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
||||
@ -792,7 +1019,20 @@ define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_remote_memory(
|
||||
define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
||||
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
||||
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
||||
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
||||
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; COMMON: atomicrmw.start:
|
||||
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
||||
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
||||
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
|
||||
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
||||
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
|
||||
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
|
||||
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; COMMON: atomicrmw.end:
|
||||
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
||||
;
|
||||
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
||||
|
@ -162,8 +162,17 @@ define i8 @test_atomicrmw_and_i8_global_system(ptr addrspace(1) %ptr, i8 %value)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
|
||||
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] seq_cst, align 4
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
|
||||
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; CHECK: atomicrmw.start:
|
||||
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; CHECK-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[ANDOPERAND]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; CHECK: atomicrmw.end:
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
|
||||
; CHECK-NEXT: ret i8 [[EXTRACTED]]
|
||||
;
|
||||
@ -215,8 +224,17 @@ define i8 @test_atomicrmw_or_i8_global_system(ptr addrspace(1) %ptr, i8 %value)
|
||||
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
|
||||
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw or ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
|
||||
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; CHECK: atomicrmw.start:
|
||||
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; CHECK-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; CHECK: atomicrmw.end:
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
|
||||
; CHECK-NEXT: ret i8 [[EXTRACTED]]
|
||||
;
|
||||
@ -235,8 +253,17 @@ define i8 @test_atomicrmw_xor_i8_global_system(ptr addrspace(1) %ptr, i8 %value)
|
||||
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
|
||||
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xor ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
|
||||
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; CHECK: atomicrmw.start:
|
||||
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
||||
; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
|
||||
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
|
||||
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; CHECK: atomicrmw.end:
|
||||
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
|
||||
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
|
||||
; CHECK-NEXT: ret i8 [[EXTRACTED]]
|
||||
;
|
||||
|
@ -93,7 +93,16 @@ define i32 @test_atomicrmw_or_0_local(ptr addrspace(3) %ptr) {
|
||||
define i32 @test_atomicrmw_or_1_global_system(ptr addrspace(1) %ptr) {
|
||||
; CHECK-LABEL: define i32 @test_atomicrmw_or_1_global_system(
|
||||
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 1 seq_cst, align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; CHECK: atomicrmw.start:
|
||||
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
|
||||
; CHECK-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; CHECK-NEXT: [[RES]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; CHECK: atomicrmw.end:
|
||||
; CHECK-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
%res = atomicrmw or ptr addrspace(1) %ptr, i32 1 seq_cst
|
||||
@ -103,7 +112,16 @@ define i32 @test_atomicrmw_or_1_global_system(ptr addrspace(1) %ptr) {
|
||||
define i32 @test_atomicrmw_or_var_global_system(ptr addrspace(1) %ptr, i32 %val) {
|
||||
; CHECK-LABEL: define i32 @test_atomicrmw_or_var_global_system(
|
||||
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VAL]] seq_cst, align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
||||
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||
; CHECK: atomicrmw.start:
|
||||
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
|
||||
; CHECK-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VAL]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
||||
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
; CHECK-NEXT: [[RES]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||
; CHECK: atomicrmw.end:
|
||||
; CHECK-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst
|
||||
|
Loading…
x
Reference in New Issue
Block a user