
System scope atomics need to use cmpxchg loops if we know nothing about the allocation the address is from. aea5980e26e6a87dab9f8acb10eb3a59dd143cb1 started this, this expands the set to cover the remaining integer operations. Don't expand xchg and add, those theoretically should work over PCIe. This is a pre-commit which will introduce performance regressions. Subsequent changes will add handling of new atomicrmw metadata, which will avoid the expansion. Note this still isn't conservative enough; we do need to expand some device scope atomics if the memory is in fine-grained remote memory.
1069 lines
64 KiB
LLVM
1069 lines
64 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX803 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw xchg
|
|
;---------------------------------------------------------------------
|
|
|
|
; xchg is supported over PCIe, so no expansion is necessary
|
|
define i32 @test_atomicrmw_xchg_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_xchg_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
|
|
; COMMON-NEXT: ret i32 [[RES]]
|
|
;
|
|
%res = atomicrmw xchg ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
|
|
; COMMON-NEXT: ret i32 [[RES]]
|
|
;
|
|
%res = atomicrmw xchg ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: ret i32 [[RES]]
|
|
;
|
|
%res = atomicrmw xchg ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: ret i32 [[RES]]
|
|
;
|
|
%res = atomicrmw xchg ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw add
|
|
;---------------------------------------------------------------------
|
|
|
|
; add is supported over PCIe, so no expansion is necessary
|
|
define i32 @test_atomicrmw_add_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_add_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
|
|
; COMMON-NEXT: ret i32 [[RES]]
|
|
;
|
|
%res = atomicrmw add ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
; add is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: ret i32 [[RES]]
|
|
;
|
|
%res = atomicrmw add ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
; add is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: ret i32 [[RES]]
|
|
;
|
|
%res = atomicrmw add ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
; add is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
|
|
define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: ret i32 [[RES]]
|
|
;
|
|
%res = atomicrmw add ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw sub
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, sub is not supported over PCIe
|
|
define i32 @test_atomicrmw_sub_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw and
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i32 @test_atomicrmw_and_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw nand
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported
|
|
define i32 @test_atomicrmw_nand_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_nand_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw nand ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw nand ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw nand ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw nand ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw or
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i32 @test_atomicrmw_or_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw xor
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i32 @test_atomicrmw_xor_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP2]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw max
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i32 @test_atomicrmw_max_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_max_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw max ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw max ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw max ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw max ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw min
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i32 @test_atomicrmw_min_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_min_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw min ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw min ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw min ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw min ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw umax
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i32 @test_atomicrmw_umax_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_umax_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umax ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umax ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umax ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umax ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw umin
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i32 @test_atomicrmw_umin_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_umin_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umin ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umin ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umin ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
|
|
; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw umin ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw uinc_wrap
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i32 @test_atomicrmw_uinc_wrap_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = add i32 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp uge i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
;---------------------------------------------------------------------
|
|
; atomicrmw udec_wrap
|
|
;---------------------------------------------------------------------
|
|
|
|
; expansion is necessary, operation not supported over PCIe
|
|
define i32 @test_atomicrmw_udec_wrap_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
|
|
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
|
|
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
|
|
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
|
|
; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
|
|
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
|
|
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
|
|
; COMMON: atomicrmw.start:
|
|
; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
|
|
; COMMON-NEXT: [[TMP2:%.*]] = sub i32 [[LOADED]], 1
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i32 [[LOADED]], 0
|
|
; COMMON-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
|
|
; COMMON-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
|
|
; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 [[VALUE]], i32 [[TMP2]]
|
|
; COMMON-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
|
|
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
|
|
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
|
|
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
|
; COMMON: atomicrmw.end:
|
|
; COMMON-NEXT: ret i32 [[NEWLOADED]]
|
|
;
|
|
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
|
|
ret i32 %res
|
|
}
|
|
|
|
!0 = !{}
|
|
;.
|
|
; GFX803: [[META0]] = !{}
|
|
;.
|
|
; GFX906: [[META0]] = !{}
|
|
;.
|
|
; GFX908: [[META0]] = !{}
|
|
;.
|
|
; GFX90A: [[META0]] = !{}
|
|
;.
|
|
; GFX942: [[META0]] = !{}
|
|
;.
|
|
; GFX10: [[META0]] = !{}
|
|
;.
|
|
; GFX11: [[META0]] = !{}
|
|
;.
|
|
; GFX12: [[META0]] = !{}
|
|
;.
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GFX10: {{.*}}
|
|
; GFX11: {{.*}}
|
|
; GFX12: {{.*}}
|
|
; GFX803: {{.*}}
|
|
; GFX906: {{.*}}
|
|
; GFX908: {{.*}}
|
|
; GFX90A: {{.*}}
|
|
; GFX942: {{.*}}
|