[libclc] Use CLC atomic functions for legacy OpenCL atom/atomic builtins (#168325)

Main changes:
* OpenCL legacy atom/atomic builtins now call CLC atomic functions
(which use Clang __scoped_atomic_*), replacing previous Clang __sync_*
functions.
* Change memory order from seq_cst to relaxed; keep device scope (spec
permits broader than workgroup). LLVM IR for _Z8atom_decPU3AS1Vi in
amdgcn--amdhsa.bc:
  Before:
%2 = atomicrmw volatile sub ptr subrspace(1) %0, i32 1
syncscope("agent") seq_cst
  After:
%2 = atomicrmw volatile sub ptr subrspace(1) %0, i32 1
syncscope("agent") monotonic
* Also adds OpenCL 1.0 atom_* variants without volatile on the pointer.
They are added for backward compatibility.
This commit is contained in:
Wenju He 2025-11-19 13:27:19 +08:00 committed by GitHub
parent f7f41350b4
commit f38cf01fc8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 208 additions and 283 deletions

View File

@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
#include <clc/clcfunc.h>
#include <clc/clctypes.h>
#define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \
_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION(volatile ADDRSPACE TYPE *, TYPE);

View File

@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
#include <clc/clcfunc.h>
#include <clc/clctypes.h>
#define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \
_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION(volatile ADDRSPACE TYPE *, TYPE);

View File

@ -1,4 +1,3 @@
cl_khr_int64_extended_atomics/minmax_helpers.ll
mem_fence/fence.cl
synchronization/barrier.cl
workitem/get_global_offset.cl

View File

@ -1,55 +0,0 @@
;;===----------------------------------------------------------------------===;;
;
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
;;===----------------------------------------------------------------------===;;
define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst
ret i64 %0
}
define i64 @__clc__sync_fetch_and_umin_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst
ret i64 %0
}
define i64 @__clc__sync_fetch_and_min_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst
ret i64 %0
}
define i64 @__clc__sync_fetch_and_umin_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value seq_cst
ret i64 %0
}
define i64 @__clc__sync_fetch_and_max_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst
ret i64 %0
}
define i64 @__clc__sync_fetch_and_umax_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst
ret i64 %0
}
define i64 @__clc__sync_fetch_and_max_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst
ret i64 %0
}
define i64 @__clc__sync_fetch_and_umax_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value seq_cst
ret i64 %0
}

View File

@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_add.h>
#include <clc/opencl/atomic/atom_add.h>
#include <clc/opencl/atomic/atomic_add.h>
#ifdef cl_khr_global_int32_base_atomics
#define __CLC_ATOMIC_OP add
#define __CLC_ATOMIC_ADDRESS_SPACE global
#include "atom_int32_binary.inc"
#endif // cl_khr_global_int32_base_atomics
#ifdef cl_khr_local_int32_base_atomics
#define __CLC_ATOMIC_OP add
#define __CLC_ATOMIC_ADDRESS_SPACE local
#include "atom_int32_binary.inc"
#endif // cl_khr_local_int32_base_atomics
#ifdef cl_khr_int64_base_atomics
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_add_8(p, val); \
return __clc_atomic_fetch_add(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_add(AS TYPE *p, TYPE val) { \
return atom_add((volatile AS TYPE *)p, val); \
}
#ifdef cl_khr_global_int32_base_atomics
__CLC_IMPL(global, int)
__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_base_atomics
#ifdef cl_khr_local_int32_base_atomics
__CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
#ifdef cl_khr_int64_base_atomics
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics

View File

@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_and.h>
#include <clc/opencl/atomic/atom_and.h>
#include <clc/opencl/atomic/atomic_and.h>
#ifdef cl_khr_global_int32_extended_atomics
#define __CLC_ATOMIC_OP and
#define __CLC_ATOMIC_ADDRESS_SPACE global
#include "atom_int32_binary.inc"
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
#define __CLC_ATOMIC_OP and
#define __CLC_ATOMIC_ADDRESS_SPACE local
#include "atom_int32_binary.inc"
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_and_8(p, val); \
return __clc_atomic_fetch_and(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_and(AS TYPE *p, TYPE val) { \
return atom_and((volatile AS TYPE *)p, val); \
}
#ifdef cl_khr_global_int32_extended_atomics
__CLC_IMPL(global, int)
__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
__CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#undef __CLC_IMPL
#endif // cl_khr_int64_extended_atomics

View File

@ -6,13 +6,20 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_compare_exchange.h>
#include <clc/opencl/atomic/atom_cmpxchg.h>
#include <clc/opencl/atomic/atomic_cmpxchg.h>
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, \
TYPE val) { \
return atomic_cmpxchg(p, cmp, val); \
return __clc_atomic_compare_exchange(p, cmp, val, __ATOMIC_RELAXED, \
__ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(AS TYPE *p, TYPE cmp, TYPE val) { \
return atom_cmpxchg((volatile AS TYPE *)p, cmp, val); \
}
#ifdef cl_khr_global_int32_base_atomics
@ -24,20 +31,11 @@ __CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
#undef __CLC_IMPL
#ifdef cl_khr_int64_base_atomics
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, \
TYPE val) { \
return __sync_val_compare_and_swap_8(p, cmp, val); \
}
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics

View File

@ -6,13 +6,17 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_dec.h>
#include <clc/opencl/atomic/atom_dec.h>
#include <clc/opencl/atomic/atom_sub.h>
#include <clc/opencl/atomic/atomic_dec.h>
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) { \
return atomic_dec(p); \
return __clc_atomic_dec(p, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(AS TYPE *p) { \
return atom_dec((volatile AS TYPE *)p); \
}
#ifdef cl_khr_global_int32_base_atomics
@ -24,19 +28,11 @@ __CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
#undef __CLC_IMPL
#ifdef cl_khr_int64_base_atomics
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) { \
return atom_sub(p, (TYPE)1); \
}
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics

View File

@ -6,13 +6,17 @@
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/atomic/atom_add.h>
#include <clc/atomic/clc_atomic_inc.h>
#include <clc/opencl/atomic/atom_inc.h>
#include <clc/opencl/atomic/atomic_inc.h>
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) { \
return atomic_inc(p); \
return __clc_atomic_inc(p, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(AS TYPE *p) { \
return atom_inc((volatile AS TYPE *)p); \
}
#ifdef cl_khr_global_int32_base_atomics
@ -24,19 +28,11 @@ __CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
#undef __CLC_IMPL
#ifdef cl_khr_int64_base_atomics
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) { \
return atom_add(p, (TYPE)1); \
}
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics

View File

@ -1,23 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
#include <clc/utils.h>
#define __CLC_ATOM_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP)( \
volatile AS TYPE * p, TYPE val) { \
return __CLC_XCONCAT(atomic_, __CLC_ATOMIC_OP)(p, val); \
}
__CLC_ATOM_IMPL(__CLC_ATOMIC_ADDRESS_SPACE, int)
__CLC_ATOM_IMPL(__CLC_ATOMIC_ADDRESS_SPACE, uint)
#undef __CLC_ATOM_IMPL
#undef __CLC_ATOMIC_OP
#undef __CLC_ATOMIC_ADDRESS_SPACE

View File

@ -6,40 +6,35 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_max.h>
#include <clc/opencl/atomic/atom_max.h>
#include <clc/opencl/atomic/atomic_max.h>
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \
return __clc_atomic_fetch_max(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_max(AS TYPE *p, TYPE val) { \
return atom_max((volatile AS TYPE *)p, val); \
}
#ifdef cl_khr_global_int32_extended_atomics
#define __CLC_ATOMIC_OP max
#define __CLC_ATOMIC_ADDRESS_SPACE global
#include "atom_int32_binary.inc"
__CLC_IMPL(global, int)
__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
#define __CLC_ATOMIC_OP max
#define __CLC_ATOMIC_ADDRESS_SPACE local
#include "atom_int32_binary.inc"
__CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
unsigned long __clc__sync_fetch_and_max_local_8(volatile local long *, long);
unsigned long __clc__sync_fetch_and_max_global_8(volatile global long *, long);
unsigned long __clc__sync_fetch_and_umax_local_8(volatile local unsigned long *,
unsigned long);
unsigned long
__clc__sync_fetch_and_umax_global_8(volatile global unsigned long *,
unsigned long);
#define __CLC_IMPL(AS, TYPE, OP) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \
return __clc__sync_fetch_and_##OP##_##AS##_8(p, val); \
}
__CLC_IMPL(global, long, max)
__CLC_IMPL(global, unsigned long, umax)
__CLC_IMPL(local, long, max)
__CLC_IMPL(local, unsigned long, umax)
#undef __CLC_IMPL
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#endif // cl_khr_int64_extended_atomics

View File

@ -6,40 +6,35 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_min.h>
#include <clc/opencl/atomic/atom_min.h>
#include <clc/opencl/atomic/atomic_min.h>
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \
return __clc_atomic_fetch_min(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_min(AS TYPE *p, TYPE val) { \
return atom_min((volatile AS TYPE *)p, val); \
}
#ifdef cl_khr_global_int32_extended_atomics
#define __CLC_ATOMIC_OP min
#define __CLC_ATOMIC_ADDRESS_SPACE global
#include "atom_int32_binary.inc"
__CLC_IMPL(global, int)
__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
#define __CLC_ATOMIC_OP min
#define __CLC_ATOMIC_ADDRESS_SPACE local
#include "atom_int32_binary.inc"
__CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
unsigned long __clc__sync_fetch_and_min_local_8(volatile local long *, long);
unsigned long __clc__sync_fetch_and_min_global_8(volatile global long *, long);
unsigned long __clc__sync_fetch_and_umin_local_8(volatile local unsigned long *,
unsigned long);
unsigned long
__clc__sync_fetch_and_umin_global_8(volatile global unsigned long *,
unsigned long);
#define __CLC_IMPL(AS, TYPE, OP) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \
return __clc__sync_fetch_and_##OP##_##AS##_8(p, val); \
}
__CLC_IMPL(global, long, min)
__CLC_IMPL(global, unsigned long, umin)
__CLC_IMPL(local, long, min)
__CLC_IMPL(local, unsigned long, umin)
#undef __CLC_IMPL
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#endif // cl_khr_int64_extended_atomics

View File

@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_or.h>
#include <clc/opencl/atomic/atom_or.h>
#include <clc/opencl/atomic/atomic_or.h>
#ifdef cl_khr_global_int32_extended_atomics
#define __CLC_ATOMIC_OP or
#define __CLC_ATOMIC_ADDRESS_SPACE global
#include "atom_int32_binary.inc"
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
#define __CLC_ATOMIC_OP or
#define __CLC_ATOMIC_ADDRESS_SPACE local
#include "atom_int32_binary.inc"
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_or_8(p, val); \
return __clc_atomic_fetch_or(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_or(AS TYPE *p, TYPE val) { \
return atom_or((volatile AS TYPE *)p, val); \
}
#ifdef cl_khr_global_int32_extended_atomics
__CLC_IMPL(global, int)
__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
__CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#undef __CLC_IMPL
#endif // cl_khr_int64_extended_atomics

View File

@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_sub.h>
#include <clc/opencl/atomic/atom_sub.h>
#include <clc/opencl/atomic/atomic_sub.h>
#ifdef cl_khr_global_int32_base_atomics
#define __CLC_ATOMIC_OP sub
#define __CLC_ATOMIC_ADDRESS_SPACE global
#include "atom_int32_binary.inc"
#endif // cl_khr_global_int32_base_atomics
#ifdef cl_khr_local_int32_base_atomics
#define __CLC_ATOMIC_OP sub
#define __CLC_ATOMIC_ADDRESS_SPACE local
#include "atom_int32_binary.inc"
#endif // cl_khr_local_int32_base_atomics
#ifdef cl_khr_int64_base_atomics
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_sub_8(p, val); \
return __clc_atomic_fetch_sub(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_sub(AS TYPE *p, TYPE val) { \
return atom_sub((volatile AS TYPE *)p, val); \
}
#ifdef cl_khr_global_int32_base_atomics
__CLC_IMPL(global, int)
__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_base_atomics
#ifdef cl_khr_local_int32_base_atomics
__CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
#ifdef cl_khr_int64_base_atomics
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics

View File

@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_exchange.h>
#include <clc/opencl/atomic/atom_xchg.h>
#include <clc/opencl/atomic/atomic_xchg.h>
#ifdef cl_khr_global_int32_base_atomics
#define __CLC_ATOMIC_OP xchg
#define __CLC_ATOMIC_ADDRESS_SPACE global
#include "atom_int32_binary.inc"
#endif // cl_khr_global_int32_base_atomics
#ifdef cl_khr_local_int32_base_atomics
#define __CLC_ATOMIC_OP xchg
#define __CLC_ATOMIC_ADDRESS_SPACE local
#include "atom_int32_binary.inc"
#endif // cl_khr_local_int32_base_atomics
#ifdef cl_khr_int64_base_atomics
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \
return __sync_swap_8(p, val); \
return __clc_atomic_exchange(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(AS TYPE *p, TYPE val) { \
return atom_xchg((volatile AS TYPE *)p, val); \
}
#ifdef cl_khr_global_int32_base_atomics
__CLC_IMPL(global, int)
__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_base_atomics
#ifdef cl_khr_local_int32_base_atomics
__CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
#ifdef cl_khr_int64_base_atomics
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics

View File

@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_xor.h>
#include <clc/opencl/atomic/atom_xor.h>
#include <clc/opencl/atomic/atomic_xor.h>
#ifdef cl_khr_global_int32_extended_atomics
#define __CLC_ATOMIC_OP xor
#define __CLC_ATOMIC_ADDRESS_SPACE global
#include "atom_int32_binary.inc"
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
#define __CLC_ATOMIC_OP xor
#define __CLC_ATOMIC_ADDRESS_SPACE local
#include "atom_int32_binary.inc"
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
// Non-volatile overloads are for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_xor_8(p, val); \
return __clc_atomic_fetch_xor(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
} \
_CLC_OVERLOAD _CLC_DEF TYPE atom_xor(AS TYPE *p, TYPE val) { \
return atom_xor((volatile AS TYPE *)p, val); \
}
#ifdef cl_khr_global_int32_extended_atomics
__CLC_IMPL(global, int)
__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
__CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
#undef __CLC_IMPL
#endif // cl_khr_int64_extended_atomics

View File

@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_add.h>
#include <clc/opencl/atomic/atomic_add.h>
#define __CLC_IMPL(TYPE, AS) \
_CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_add(p, val); \
return __clc_atomic_fetch_add(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
}
__CLC_IMPL(int, global)

View File

@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_and.h>
#include <clc/opencl/atomic/atomic_and.h>
#define __CLC_IMPL(TYPE, AS) \
_CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_and(p, val); \
return __clc_atomic_fetch_and(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
}
__CLC_IMPL(int, global)

View File

@ -6,12 +6,15 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_compare_exchange.h>
#include <clc/opencl/atomic/atomic_cmpxchg.h>
#define __CLC_IMPL(TYPE, AS) \
_CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, \
TYPE val) { \
return __sync_val_compare_and_swap(p, cmp, val); \
return __clc_atomic_compare_exchange(p, cmp, val, __ATOMIC_RELAXED, \
__ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
}
__CLC_IMPL(int, global)

View File

@ -13,7 +13,7 @@
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \
volatile ADDRSPACE __CLC_GENTYPE *Ptr) { \
return __CLC_IMPL_FUNCTION(Ptr, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE); \
return __CLC_IMPL_FUNCTION(Ptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); \
}
__CLC_DEFINE_ATOMIC(global)

View File

@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_max.h>
#include <clc/opencl/atomic/atomic_max.h>
#define __CLC_IMPL(TYPE, AS, OP) \
_CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_##OP(p, val); \
return __clc_atomic_fetch_max(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
}
__CLC_IMPL(int, global, max)

View File

@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_min.h>
#include <clc/opencl/atomic/atomic_min.h>
#define __CLC_IMPL(TYPE, AS, OP) \
_CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_##OP(p, val); \
return __clc_atomic_fetch_min(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
}
__CLC_IMPL(int, global, min)

View File

@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_or.h>
#include <clc/opencl/atomic/atomic_or.h>
#define __CLC_IMPL(TYPE, AS) \
_CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_or(p, val); \
return __clc_atomic_fetch_or(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
}
__CLC_IMPL(int, global)

View File

@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_sub.h>
#include <clc/opencl/atomic/atomic_sub.h>
#define __CLC_IMPL(TYPE, AS) \
_CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_sub(p, val); \
return __clc_atomic_fetch_sub(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
}
__CLC_IMPL(int, global)

View File

@ -6,24 +6,19 @@
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/as_type.h>
#include <clc/atomic/clc_atomic_exchange.h>
#include <clc/opencl/atomic/atomic_xchg.h>
_CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) {
return as_float(atomic_xchg((volatile global uint *)p, as_uint(val)));
}
_CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) {
return as_float(atomic_xchg((volatile local uint *)p, as_uint(val)));
}
#define __CLC_IMPL(TYPE, AS) \
_CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \
return __sync_swap_4(p, val); \
return __clc_atomic_exchange(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
}
__CLC_IMPL(int, global)
__CLC_IMPL(unsigned int, global)
__CLC_IMPL(float, global)
__CLC_IMPL(int, local)
__CLC_IMPL(unsigned int, local)
__CLC_IMPL(float, local)
#undef __CLC_IMPL

View File

@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
#include <clc/atomic/clc_atomic_fetch_xor.h>
#include <clc/opencl/atomic/atomic_xor.h>
#define __CLC_IMPL(TYPE, AS) \
_CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \
return __sync_fetch_and_xor(p, val); \
return __clc_atomic_fetch_xor(p, val, __ATOMIC_RELAXED, \
__MEMORY_SCOPE_DEVICE); \
}
__CLC_IMPL(int, global)