
System scope atomics need to use cmpxchg loops if we know nothing about the allocation the address is from. aea5980e26e6a87dab9f8acb10eb3a59dd143cb1 started this, this expands the set to cover the remaining integer operations. Don't expand xchg and add, those theoretically should work over PCIe. This is a pre-commit which will introduce performance regressions. Subsequent changes will add handling of new atomicrmw metadata, which will avoid the expansion. Note this still isn't conservative enough; we do need to expand some device scope atomics if the memory is in fine-grained remote memory.
1069 lines
64 KiB
LLVM
1069 lines
64 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX803 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw xchg
|
|
;---------------------------------------------------------------------
|
|
|
|
; xchg is supported over PCIe, so no expansion is necessary
|
|
define i64 @test_atomicrmw_xchg_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_xchg_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
|
|
; COMMON-NEXT: ret i64 [[RES]]
|
|
;
|
|
%res = atomicrmw xchg ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i64 @test_atomicrmw_xchg_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_xchg_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
|
|
; COMMON-NEXT: ret i64 [[RES]]
|
|
;
|
|
%res = atomicrmw xchg ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i64 @test_atomicrmw_xchg_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_xchg_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: ret i64 [[RES]]
|
|
;
|
|
%res = atomicrmw xchg ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i64 @test_atomicrmw_xchg_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_xchg_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: ret i64 [[RES]]
|
|
;
|
|
%res = atomicrmw xchg ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw add
|
|
;---------------------------------------------------------------------
|
|
|
|
; add is supported over PCIe, so no expansion is necessary
|
|
define i64 @test_atomicrmw_add_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_add_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8
|
|
; COMMON-NEXT: ret i64 [[RES]]
|
|
;
|
|
%res = atomicrmw add ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
; add is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i64 @test_atomicrmw_add_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_add_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: ret i64 [[RES]]
|
|
;
|
|
%res = atomicrmw add ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
; add is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i64 @test_atomicrmw_add_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_add_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: ret i64 [[RES]]
|
|
;
|
|
%res = atomicrmw add ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
; add is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i64 @test_atomicrmw_add_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_add_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i64 [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: ret i64 [[RES]]
|
|
;
|
|
%res = atomicrmw add ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw sub
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, sub is not supported over PCIe
|
|
define i64 @test_atomicrmw_sub_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_sub_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = sub i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw sub ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw and
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i64 @test_atomicrmw_and_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_and_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw and ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw nand
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported
|
|
define i64 @test_atomicrmw_nand_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_nand_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw nand ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_nand_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_nand_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw nand ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_nand_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_nand_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw nand ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_nand_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_nand_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = and i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw nand ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw or
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i64 @test_atomicrmw_or_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_or_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = or i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw or ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw xor
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i64 @test_atomicrmw_xor_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_xor_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw xor ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw max
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i64 @test_atomicrmw_max_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_max_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw max ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_max_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_max_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw max ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_max_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_max_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw max ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_max_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_max_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw max ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw min
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i64 @test_atomicrmw_min_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_min_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw min ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_min_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_min_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw min ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_min_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_min_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw min ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_min_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_min_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw min ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw umax
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i64 @test_atomicrmw_umax_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_umax_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umax ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_umax_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_umax_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umax ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_umax_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_umax_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umax ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_umax_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_umax_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umax ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw umin
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i64 @test_atomicrmw_umin_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_umin_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umin ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_umin_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_umin_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umin ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_umin_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_umin_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umin ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_umin_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_umin_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i64 [[LOADED]], i64 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umin ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw uinc_wrap
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i64 @test_atomicrmw_uinc_wrap_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_uinc_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = add i64 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i64 0, i64 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP4]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw udec_wrap
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i64 @test_atomicrmw_udec_wrap_i64_global_system(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
|
|
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
|
|
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
|
|
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i64 %value) {
|
|
; COMMON-LABEL: define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = sub i64 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[LOADED]], 0
|
|
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i64 [[VALUE]], i64 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst, align 8
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP6]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP6]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i64 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i64 %res
|
|
}
|
|
|
|
!0 = !{}
|
|
;.
|
|
; GFX803: [[META0]] = !{}
|
|
;.
|
|
; GFX906: [[META0]] = !{}
|
|
;.
|
|
; GFX908: [[META0]] = !{}
|
|
;.
|
|
; GFX90A: [[META0]] = !{}
|
|
;.
|
|
; GFX942: [[META0]] = !{}
|
|
;.
|
|
; GFX10: [[META0]] = !{}
|
|
;.
|
|
; GFX11: [[META0]] = !{}
|
|
;.
|
|
; GFX12: [[META0]] = !{}
|
|
;.
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GFX10: {{.*}}
|
|
; GFX11: {{.*}}
|
|
; GFX12: {{.*}}
|
|
; GFX803: {{.*}}
|
|
; GFX906: {{.*}}
|
|
; GFX908: {{.*}}
|
|
; GFX90A: {{.*}}
|
|
; GFX942: {{.*}}
|