[libclc] Use CLC atomic functions for legacy OpenCL atom/atomic builtins (#168325)
Main changes:
* OpenCL legacy atom/atomic builtins now call CLC atomic functions
(which use Clang __scoped_atomic_*), replacing previous Clang __sync_*
functions.
* Change memory order from seq_cst to relaxed; keep device scope (spec
permits broader than workgroup). LLVM IR for _Z8atom_decPU3AS1Vi in
amdgcn--amdhsa.bc:
Before:
%2 = atomicrmw volatile sub ptr subrspace(1) %0, i32 1
syncscope("agent") seq_cst
After:
%2 = atomicrmw volatile sub ptr subrspace(1) %0, i32 1
syncscope("agent") monotonic
* Also adds OpenCL 1.0 atom_* variants without volatile on the pointer.
They are added for backward compatibility.
This commit is contained in:
parent
f7f41350b4
commit
f38cf01fc8
@ -6,9 +6,6 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clcfunc.h>
|
||||
#include <clc/clctypes.h>
|
||||
|
||||
#define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION(volatile ADDRSPACE TYPE *, TYPE);
|
||||
|
||||
|
||||
@ -6,9 +6,6 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clcfunc.h>
|
||||
#include <clc/clctypes.h>
|
||||
|
||||
#define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION(volatile ADDRSPACE TYPE *, TYPE);
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
cl_khr_int64_extended_atomics/minmax_helpers.ll
|
||||
mem_fence/fence.cl
|
||||
synchronization/barrier.cl
|
||||
workitem/get_global_offset.cl
|
||||
|
||||
@ -1,55 +0,0 @@
|
||||
;;===----------------------------------------------------------------------===;;
|
||||
;
|
||||
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
; See https://llvm.org/LICENSE.txt for license information.
|
||||
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
;
|
||||
;;===----------------------------------------------------------------------===;;
|
||||
|
||||
define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umin_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_min_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umin_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_max_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umax_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_max_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umax_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
@ -6,32 +6,35 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_add.h>
|
||||
#include <clc/opencl/atomic/atom_add.h>
|
||||
#include <clc/opencl/atomic/atomic_add.h>
|
||||
|
||||
#ifdef cl_khr_global_int32_base_atomics
|
||||
#define __CLC_ATOMIC_OP add
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE global
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_global_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_base_atomics
|
||||
#define __CLC_ATOMIC_OP add
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE local
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_local_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_int64_base_atomics
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_add_8(p, val); \
|
||||
return __clc_atomic_fetch_add(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_add(AS TYPE *p, TYPE val) { \
|
||||
return atom_add((volatile AS TYPE *)p, val); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_base_atomics
|
||||
__CLC_IMPL(global, int)
|
||||
__CLC_IMPL(global, unsigned int)
|
||||
#endif // cl_khr_global_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_base_atomics
|
||||
__CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_int64_base_atomics
|
||||
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#endif // cl_khr_int64_base_atomics
|
||||
|
||||
@ -6,32 +6,35 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_and.h>
|
||||
#include <clc/opencl/atomic/atom_and.h>
|
||||
#include <clc/opencl/atomic/atomic_and.h>
|
||||
|
||||
#ifdef cl_khr_global_int32_extended_atomics
|
||||
#define __CLC_ATOMIC_OP and
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE global
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_global_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_extended_atomics
|
||||
#define __CLC_ATOMIC_OP and
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE local
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_local_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_int64_extended_atomics
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_and_8(p, val); \
|
||||
return __clc_atomic_fetch_and(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_and(AS TYPE *p, TYPE val) { \
|
||||
return atom_and((volatile AS TYPE *)p, val); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_extended_atomics
|
||||
__CLC_IMPL(global, int)
|
||||
__CLC_IMPL(global, unsigned int)
|
||||
#endif // cl_khr_global_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_extended_atomics
|
||||
__CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_int64_extended_atomics
|
||||
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#endif // cl_khr_int64_extended_atomics
|
||||
|
||||
@ -6,13 +6,20 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_compare_exchange.h>
|
||||
#include <clc/opencl/atomic/atom_cmpxchg.h>
|
||||
#include <clc/opencl/atomic/atomic_cmpxchg.h>
|
||||
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, \
|
||||
TYPE val) { \
|
||||
return atomic_cmpxchg(p, cmp, val); \
|
||||
return __clc_atomic_compare_exchange(p, cmp, val, __ATOMIC_RELAXED, \
|
||||
__ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(AS TYPE *p, TYPE cmp, TYPE val) { \
|
||||
return atom_cmpxchg((volatile AS TYPE *)p, cmp, val); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_base_atomics
|
||||
@ -24,20 +31,11 @@ __CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_base_atomics
|
||||
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#ifdef cl_khr_int64_base_atomics
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, \
|
||||
TYPE val) { \
|
||||
return __sync_val_compare_and_swap_8(p, cmp, val); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#endif // cl_khr_int64_base_atomics
|
||||
|
||||
@ -6,13 +6,17 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_dec.h>
|
||||
#include <clc/opencl/atomic/atom_dec.h>
|
||||
#include <clc/opencl/atomic/atom_sub.h>
|
||||
#include <clc/opencl/atomic/atomic_dec.h>
|
||||
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) { \
|
||||
return atomic_dec(p); \
|
||||
return __clc_atomic_dec(p, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(AS TYPE *p) { \
|
||||
return atom_dec((volatile AS TYPE *)p); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_base_atomics
|
||||
@ -24,19 +28,11 @@ __CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_base_atomics
|
||||
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#ifdef cl_khr_int64_base_atomics
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) { \
|
||||
return atom_sub(p, (TYPE)1); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#endif // cl_khr_int64_base_atomics
|
||||
|
||||
@ -6,13 +6,17 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/atomic/atom_add.h>
|
||||
#include <clc/atomic/clc_atomic_inc.h>
|
||||
#include <clc/opencl/atomic/atom_inc.h>
|
||||
#include <clc/opencl/atomic/atomic_inc.h>
|
||||
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) { \
|
||||
return atomic_inc(p); \
|
||||
return __clc_atomic_inc(p, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(AS TYPE *p) { \
|
||||
return atom_inc((volatile AS TYPE *)p); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_base_atomics
|
||||
@ -24,19 +28,11 @@ __CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_base_atomics
|
||||
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#ifdef cl_khr_int64_base_atomics
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) { \
|
||||
return atom_add(p, (TYPE)1); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#endif // cl_khr_int64_base_atomics
|
||||
|
||||
@ -1,23 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
#include <clc/utils.h>
|
||||
|
||||
#define __CLC_ATOM_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP)( \
|
||||
volatile AS TYPE * p, TYPE val) { \
|
||||
return __CLC_XCONCAT(atomic_, __CLC_ATOMIC_OP)(p, val); \
|
||||
}
|
||||
|
||||
__CLC_ATOM_IMPL(__CLC_ATOMIC_ADDRESS_SPACE, int)
|
||||
__CLC_ATOM_IMPL(__CLC_ATOMIC_ADDRESS_SPACE, uint)
|
||||
|
||||
#undef __CLC_ATOM_IMPL
|
||||
#undef __CLC_ATOMIC_OP
|
||||
#undef __CLC_ATOMIC_ADDRESS_SPACE
|
||||
@ -6,40 +6,35 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_max.h>
|
||||
#include <clc/opencl/atomic/atom_max.h>
|
||||
#include <clc/opencl/atomic/atomic_max.h>
|
||||
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \
|
||||
return __clc_atomic_fetch_max(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_max(AS TYPE *p, TYPE val) { \
|
||||
return atom_max((volatile AS TYPE *)p, val); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_extended_atomics
|
||||
#define __CLC_ATOMIC_OP max
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE global
|
||||
#include "atom_int32_binary.inc"
|
||||
__CLC_IMPL(global, int)
|
||||
__CLC_IMPL(global, unsigned int)
|
||||
#endif // cl_khr_global_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_extended_atomics
|
||||
#define __CLC_ATOMIC_OP max
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE local
|
||||
#include "atom_int32_binary.inc"
|
||||
__CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_int64_extended_atomics
|
||||
|
||||
unsigned long __clc__sync_fetch_and_max_local_8(volatile local long *, long);
|
||||
unsigned long __clc__sync_fetch_and_max_global_8(volatile global long *, long);
|
||||
unsigned long __clc__sync_fetch_and_umax_local_8(volatile local unsigned long *,
|
||||
unsigned long);
|
||||
unsigned long
|
||||
__clc__sync_fetch_and_umax_global_8(volatile global unsigned long *,
|
||||
unsigned long);
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE, OP) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \
|
||||
return __clc__sync_fetch_and_##OP##_##AS##_8(p, val); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(global, long, max)
|
||||
__CLC_IMPL(global, unsigned long, umax)
|
||||
__CLC_IMPL(local, long, max)
|
||||
__CLC_IMPL(local, unsigned long, umax)
|
||||
#undef __CLC_IMPL
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
|
||||
#endif // cl_khr_int64_extended_atomics
|
||||
|
||||
@ -6,40 +6,35 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_min.h>
|
||||
#include <clc/opencl/atomic/atom_min.h>
|
||||
#include <clc/opencl/atomic/atomic_min.h>
|
||||
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \
|
||||
return __clc_atomic_fetch_min(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_min(AS TYPE *p, TYPE val) { \
|
||||
return atom_min((volatile AS TYPE *)p, val); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_extended_atomics
|
||||
#define __CLC_ATOMIC_OP min
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE global
|
||||
#include "atom_int32_binary.inc"
|
||||
__CLC_IMPL(global, int)
|
||||
__CLC_IMPL(global, unsigned int)
|
||||
#endif // cl_khr_global_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_extended_atomics
|
||||
#define __CLC_ATOMIC_OP min
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE local
|
||||
#include "atom_int32_binary.inc"
|
||||
__CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_int64_extended_atomics
|
||||
|
||||
unsigned long __clc__sync_fetch_and_min_local_8(volatile local long *, long);
|
||||
unsigned long __clc__sync_fetch_and_min_global_8(volatile global long *, long);
|
||||
unsigned long __clc__sync_fetch_and_umin_local_8(volatile local unsigned long *,
|
||||
unsigned long);
|
||||
unsigned long
|
||||
__clc__sync_fetch_and_umin_global_8(volatile global unsigned long *,
|
||||
unsigned long);
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE, OP) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \
|
||||
return __clc__sync_fetch_and_##OP##_##AS##_8(p, val); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(global, long, min)
|
||||
__CLC_IMPL(global, unsigned long, umin)
|
||||
__CLC_IMPL(local, long, min)
|
||||
__CLC_IMPL(local, unsigned long, umin)
|
||||
#undef __CLC_IMPL
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
|
||||
#endif // cl_khr_int64_extended_atomics
|
||||
|
||||
@ -6,32 +6,35 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_or.h>
|
||||
#include <clc/opencl/atomic/atom_or.h>
|
||||
#include <clc/opencl/atomic/atomic_or.h>
|
||||
|
||||
#ifdef cl_khr_global_int32_extended_atomics
|
||||
#define __CLC_ATOMIC_OP or
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE global
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_global_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_extended_atomics
|
||||
#define __CLC_ATOMIC_OP or
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE local
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_local_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_int64_extended_atomics
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_or_8(p, val); \
|
||||
return __clc_atomic_fetch_or(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_or(AS TYPE *p, TYPE val) { \
|
||||
return atom_or((volatile AS TYPE *)p, val); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_extended_atomics
|
||||
__CLC_IMPL(global, int)
|
||||
__CLC_IMPL(global, unsigned int)
|
||||
#endif // cl_khr_global_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_extended_atomics
|
||||
__CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_int64_extended_atomics
|
||||
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#endif // cl_khr_int64_extended_atomics
|
||||
|
||||
@ -6,32 +6,35 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_sub.h>
|
||||
#include <clc/opencl/atomic/atom_sub.h>
|
||||
#include <clc/opencl/atomic/atomic_sub.h>
|
||||
|
||||
#ifdef cl_khr_global_int32_base_atomics
|
||||
#define __CLC_ATOMIC_OP sub
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE global
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_global_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_base_atomics
|
||||
#define __CLC_ATOMIC_OP sub
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE local
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_local_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_int64_base_atomics
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_sub_8(p, val); \
|
||||
return __clc_atomic_fetch_sub(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_sub(AS TYPE *p, TYPE val) { \
|
||||
return atom_sub((volatile AS TYPE *)p, val); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_base_atomics
|
||||
__CLC_IMPL(global, int)
|
||||
__CLC_IMPL(global, unsigned int)
|
||||
#endif // cl_khr_global_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_base_atomics
|
||||
__CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_int64_base_atomics
|
||||
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#endif // cl_khr_int64_base_atomics
|
||||
|
||||
@ -6,32 +6,35 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_exchange.h>
|
||||
#include <clc/opencl/atomic/atom_xchg.h>
|
||||
#include <clc/opencl/atomic/atomic_xchg.h>
|
||||
|
||||
#ifdef cl_khr_global_int32_base_atomics
|
||||
#define __CLC_ATOMIC_OP xchg
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE global
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_global_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_base_atomics
|
||||
#define __CLC_ATOMIC_OP xchg
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE local
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_local_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_int64_base_atomics
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_swap_8(p, val); \
|
||||
return __clc_atomic_exchange(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(AS TYPE *p, TYPE val) { \
|
||||
return atom_xchg((volatile AS TYPE *)p, val); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_base_atomics
|
||||
__CLC_IMPL(global, int)
|
||||
__CLC_IMPL(global, unsigned int)
|
||||
#endif // cl_khr_global_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_base_atomics
|
||||
__CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_base_atomics
|
||||
|
||||
#ifdef cl_khr_int64_base_atomics
|
||||
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#endif // cl_khr_int64_base_atomics
|
||||
|
||||
@ -6,32 +6,35 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_xor.h>
|
||||
#include <clc/opencl/atomic/atom_xor.h>
|
||||
#include <clc/opencl/atomic/atomic_xor.h>
|
||||
|
||||
#ifdef cl_khr_global_int32_extended_atomics
|
||||
#define __CLC_ATOMIC_OP xor
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE global
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_global_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_extended_atomics
|
||||
#define __CLC_ATOMIC_OP xor
|
||||
#define __CLC_ATOMIC_ADDRESS_SPACE local
|
||||
#include "atom_int32_binary.inc"
|
||||
#endif // cl_khr_local_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_int64_extended_atomics
|
||||
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
|
||||
|
||||
#define __CLC_IMPL(AS, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_xor_8(p, val); \
|
||||
return __clc_atomic_fetch_xor(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atom_xor(AS TYPE *p, TYPE val) { \
|
||||
return atom_xor((volatile AS TYPE *)p, val); \
|
||||
}
|
||||
|
||||
#ifdef cl_khr_global_int32_extended_atomics
|
||||
__CLC_IMPL(global, int)
|
||||
__CLC_IMPL(global, unsigned int)
|
||||
#endif // cl_khr_global_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_local_int32_extended_atomics
|
||||
__CLC_IMPL(local, int)
|
||||
__CLC_IMPL(local, unsigned int)
|
||||
#endif // cl_khr_local_int32_extended_atomics
|
||||
|
||||
#ifdef cl_khr_int64_extended_atomics
|
||||
|
||||
__CLC_IMPL(global, long)
|
||||
__CLC_IMPL(global, unsigned long)
|
||||
__CLC_IMPL(local, long)
|
||||
__CLC_IMPL(local, unsigned long)
|
||||
#undef __CLC_IMPL
|
||||
|
||||
#endif // cl_khr_int64_extended_atomics
|
||||
|
||||
@ -6,11 +6,13 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_add.h>
|
||||
#include <clc/opencl/atomic/atomic_add.h>
|
||||
|
||||
#define __CLC_IMPL(TYPE, AS) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_add(p, val); \
|
||||
return __clc_atomic_fetch_add(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(int, global)
|
||||
|
||||
@ -6,11 +6,13 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_and.h>
|
||||
#include <clc/opencl/atomic/atomic_and.h>
|
||||
|
||||
#define __CLC_IMPL(TYPE, AS) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_and(p, val); \
|
||||
return __clc_atomic_fetch_and(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(int, global)
|
||||
|
||||
@ -6,12 +6,15 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_compare_exchange.h>
|
||||
#include <clc/opencl/atomic/atomic_cmpxchg.h>
|
||||
|
||||
#define __CLC_IMPL(TYPE, AS) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, \
|
||||
TYPE val) { \
|
||||
return __sync_val_compare_and_swap(p, cmp, val); \
|
||||
return __clc_atomic_compare_exchange(p, cmp, val, __ATOMIC_RELAXED, \
|
||||
__ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(int, global)
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \
|
||||
volatile ADDRSPACE __CLC_GENTYPE *Ptr) { \
|
||||
return __CLC_IMPL_FUNCTION(Ptr, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE); \
|
||||
return __CLC_IMPL_FUNCTION(Ptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); \
|
||||
}
|
||||
|
||||
__CLC_DEFINE_ATOMIC(global)
|
||||
|
||||
@ -6,11 +6,13 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_max.h>
|
||||
#include <clc/opencl/atomic/atomic_max.h>
|
||||
|
||||
#define __CLC_IMPL(TYPE, AS, OP) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_##OP(p, val); \
|
||||
return __clc_atomic_fetch_max(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(int, global, max)
|
||||
|
||||
@ -6,11 +6,13 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_min.h>
|
||||
#include <clc/opencl/atomic/atomic_min.h>
|
||||
|
||||
#define __CLC_IMPL(TYPE, AS, OP) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_##OP(p, val); \
|
||||
return __clc_atomic_fetch_min(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(int, global, min)
|
||||
|
||||
@ -6,11 +6,13 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_or.h>
|
||||
#include <clc/opencl/atomic/atomic_or.h>
|
||||
|
||||
#define __CLC_IMPL(TYPE, AS) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_or(p, val); \
|
||||
return __clc_atomic_fetch_or(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(int, global)
|
||||
|
||||
@ -6,11 +6,13 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_sub.h>
|
||||
#include <clc/opencl/atomic/atomic_sub.h>
|
||||
|
||||
#define __CLC_IMPL(TYPE, AS) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_sub(p, val); \
|
||||
return __clc_atomic_fetch_sub(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(int, global)
|
||||
|
||||
@ -6,24 +6,19 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/as_type.h>
|
||||
#include <clc/atomic/clc_atomic_exchange.h>
|
||||
#include <clc/opencl/atomic/atomic_xchg.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) {
|
||||
return as_float(atomic_xchg((volatile global uint *)p, as_uint(val)));
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) {
|
||||
return as_float(atomic_xchg((volatile local uint *)p, as_uint(val)));
|
||||
}
|
||||
|
||||
#define __CLC_IMPL(TYPE, AS) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_swap_4(p, val); \
|
||||
return __clc_atomic_exchange(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(int, global)
|
||||
__CLC_IMPL(unsigned int, global)
|
||||
__CLC_IMPL(float, global)
|
||||
__CLC_IMPL(int, local)
|
||||
__CLC_IMPL(unsigned int, local)
|
||||
__CLC_IMPL(float, local)
|
||||
#undef __CLC_IMPL
|
||||
|
||||
@ -6,11 +6,13 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/atomic/clc_atomic_fetch_xor.h>
|
||||
#include <clc/opencl/atomic/atomic_xor.h>
|
||||
|
||||
#define __CLC_IMPL(TYPE, AS) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \
|
||||
return __sync_fetch_and_xor(p, val); \
|
||||
return __clc_atomic_fetch_xor(p, val, __ATOMIC_RELAXED, \
|
||||
__MEMORY_SCOPE_DEVICE); \
|
||||
}
|
||||
|
||||
__CLC_IMPL(int, global)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user